From f863c5b7e077cc2d3033ade02a1a124e1d0d5775 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Fri, 8 Nov 2024 13:50:32 +0000 Subject: [PATCH 001/126] retry missing docsums to all other nodes --- .../dispatch/rpc/RpcConnectionPool.java | 2 +- .../dispatch/rpc/RpcProtobufFillInvoker.java | 107 ++++++++++++++++-- .../search/dispatch/rpc/RpcResourcePool.java | 9 +- 3 files changed, 102 insertions(+), 16 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java index d31bd1f08fe8..7679705a1361 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java @@ -16,7 +16,6 @@ public interface RpcConnectionPool extends AutoCloseable { /** Returns a connection to the given node id. */ Client.NodeConnection getConnection(int nodeId); - /** Will return a list of items that need a delayed close when updating node set. */ default Collection updateNodes(DispatchNodesConfig nodesConfig) { return List.of(); } @@ -24,4 +23,5 @@ public interface RpcConnectionPool extends AutoCloseable { @Override void close(); + default Collection knownNodeIds() { return List.of(); } } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java index fe6dc9abe199..6e8927423840 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java @@ -22,6 +22,7 @@ import com.yahoo.slime.BinaryFormat; import com.yahoo.slime.BinaryView; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; @@ -71,11 +72,15 @@ enum DecodePolicy {EAGER, ONDEMAND} @Override protected void sendFillRequest(Result result, String summaryClass) { ListMap hitsByNode = hitsByNode(result); + int queueSize = Math.max(hitsByNode.size(), resourcePool.knownNodeIds().size()); + responses = new LinkedBlockingQueue<>(queueSize); + sendFillRequestByNode(result, summaryClass, hitsByNode); + } + void sendFillRequestByNode(Result result, String summaryClass, ListMap hitsByNode) { result.getQuery().trace(false, 5, "Sending ", hitsByNode.size(), " summary fetch requests with jrt/protobuf"); outstandingResponses = hitsByNode.size(); - responses = new LinkedBlockingQueue<>(outstandingResponses); var timeout = TimeoutHelper.calculateTimeout(result.getQuery()); if (timeout.timedOut()) { @@ -144,7 +149,9 @@ private void sendDocsumsRequest(int nodeId, List hits, byte[] payload, private void processResponses(Result result, String summaryClass) throws TimeoutException { try { - int skippedHits = 0; + List alternates = new ArrayList<>(); + alternates.addAll(resourcePool.knownNodeIds()); + List skippedHits = new ArrayList<>(); while (outstandingResponses > 0) { long timeLeftMs = result.getQuery().getTimeLeft(); if (timeLeftMs <= 0) { @@ -159,10 +166,46 @@ private void processResponses(Result result, String summaryClass) throws Timeout throwTimeout(); } var hitsContext = responseAndHits.getSecond(); - skippedHits += processResponse(result, response, hitsContext, summaryClass); + skippedHits.addAll(processResponse(result, response, hitsContext, summaryClass)); outstandingResponses--; } - if (skippedHits != 0) { + + if (! skippedHits.isEmpty()) { + ListMap retryMap = new ListMap<>(); + while (alternates.size() > 0) { + int nodeId = alternates.remove(0); + for (var hit : skippedHits) { + if (hit.getDistributionKey() != nodeId) { + retryMap.put(nodeId, hit); + } + } + } + if (retryMap.size() > 0) { + sendFillRequestByNode(result, summaryClass, retryMap); + } + while (outstandingResponses > 0) { + long timeLeftMs = result.getQuery().getTimeLeft(); + if (timeLeftMs <= 0) { + log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); + break; + } + var responseAndHits = responses.poll(timeLeftMs, TimeUnit.MILLISECONDS); + if (responseAndHits == null) { + log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); + break; + } + var response = responseAndHits.getFirst(); + if (response.timeout()) { + log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); + break; + } + var hitsContext = responseAndHits.getSecond(); + speculativeProcessResponse(result, response, hitsContext, summaryClass); + outstandingResponses--; + } + skippedHits.removeIf(hit -> hit.isFilled(summaryClass)); + } + if (! skippedHits.isEmpty()) { result.hits().addError(ErrorMessage .createEmptyDocsums("Missing hit summary data for summary " + summaryClass + " for " + skippedHits + " hits")); } @@ -171,11 +214,11 @@ private void processResponses(Result result, String summaryClass) throws Timeout } } - private int processResponse(Result result, Client.ResponseOrError responseOrError, List hitsContext, + private List processResponse(Result result, Client.ResponseOrError responseOrError, List hitsContext, String summaryClass) { if (responseOrError.error().isPresent()) { if (hasReportedError) { - return 0; + return List.of(); } String error = responseOrError.error().get(); result.hits().addError(ErrorMessage.createBackendCommunicationError(error)); @@ -186,7 +229,21 @@ private int processResponse(Result result, Client.ResponseOrError responseOrError, + List hitsContext, + String summaryClass) + { + if (responseOrError.error().isPresent()) { + return; + } + Client.ProtobufResponse response = responseOrError.response().get(); + byte[] responseBytes = compressor.decompress(response); + speculativeFill(result, hitsContext, summaryClass, responseBytes); } private void addErrors(Result result, com.yahoo.slime.Inspector errors) { @@ -202,7 +259,7 @@ private void convertErrorsFromDocsumReply(Result target, List hits, String summaryClass, byte[] payload) { + private List fill(Result result, List hits, String summaryClass, byte[] payload) { try { var protobuf = SearchProtocol.DocsumReply.parseFrom(payload); var root = (decodePolicy == DecodePolicy.ONDEMAND) @@ -217,9 +274,9 @@ private int fill(Result result, List hits, String summaryClass, byte[] Inspector summaries = new SlimeAdapter(root.field("docsums")); if (!summaries.valid()) { - return 0; // No summaries; Perhaps we requested a non-existing summary class + return List.of(); // No summaries; Perhaps we requested a non-existing summary class } - int skippedHits = 0; + List skippedHits = new ArrayList<>(); for (int i = 0; i < hits.size(); i++) { Inspector summary = summaries.entry(i).field("docsum"); if (summary.valid()) { @@ -227,14 +284,40 @@ private int fill(Result result, List hits, String summaryClass, byte[] hits.get(i).addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); hits.get(i).setFilled(summaryClass); } else { - skippedHits++; + skippedHits.add(hits.get(i)); } } return skippedHits; } catch (InvalidProtocolBufferException ex) { log.log(Level.WARNING, "Invalid response to docsum request", ex); result.hits().addError(ErrorMessage.createInternalServerError("Invalid response to docsum request from backend")); - return 0; + return List.of(); + } + } + + private void speculativeFill(Result result, List hits, String summaryClass, byte[] payload) { + try { + var protobuf = SearchProtocol.DocsumReply.parseFrom(payload); + var root = (decodePolicy == DecodePolicy.ONDEMAND) + ? BinaryView.inspect(protobuf.getSlimeSummaries().toByteArray()) + : BinaryFormat.decode(protobuf.getSlimeSummaries().toByteArray()).get(); + Inspector summaries = new SlimeAdapter(root.field("docsums")); + if (!summaries.valid()) { + return; // No summaries; Perhaps we requested a non-existing summary class + } + for (int i = 0; i < hits.size(); i++) { + Inspector summary = summaries.entry(i).field("docsum"); + if (summary.valid()) { + FastHit hit = hits.get(i); + if (! hit.getFilled().contains(summaryClass)) { + hit.setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); + hit.addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); + hit.setFilled(summaryClass); + } + } + } + } catch (InvalidProtocolBufferException ex) { + log.log(Level.WARNING, "Invalid response to docsum request", ex); } } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java index d127ed69df5d..0f82c05658d6 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java @@ -1,7 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.dispatch.rpc; -import com.yahoo.search.dispatch.FillInvoker; import com.yahoo.search.dispatch.rpc.Client.NodeConnection; import com.yahoo.search.dispatch.rpc.RpcClient.RpcNodeConnection; import com.yahoo.vespa.config.search.DispatchConfig; @@ -16,8 +15,7 @@ import java.util.concurrent.ThreadLocalRandom; /** - * RpcResourcePool constructs {@link FillInvoker} objects that communicate with content nodes over RPC. It also contains - * the RPC connection pool. + * RpcResourcePool contains the RPC connection pool. * * @author ollivir */ @@ -86,6 +84,11 @@ public void close() { } } + @Override + public Collection knownNodeIds() { + return nodeConnectionPools.keySet(); + } + private static class NodeConnectionPool implements AutoCloseable { private final List connections; From bd1fc94bacfab2b2bf153eade94fb1e3de1a5a4c Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 11 Nov 2024 13:42:25 +0000 Subject: [PATCH 002/126] refactor after review --- .../dispatch/rpc/RpcProtobufFillInvoker.java | 137 ++++++------------ 1 file changed, 47 insertions(+), 90 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java index 6e8927423840..2449f2375aca 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java @@ -4,7 +4,6 @@ import ai.vespa.searchlib.searchprotocol.protobuf.SearchProtocol; import com.google.protobuf.InvalidProtocolBufferException; import com.yahoo.collections.ListMap; -import com.yahoo.collections.Pair; import com.yahoo.compress.Compressor; import com.yahoo.container.protect.Error; import com.yahoo.data.access.Inspector; @@ -51,7 +50,9 @@ enum DecodePolicy {EAGER, ONDEMAND} private final CompressPayload compressor; private final DecodePolicy decodePolicy; - private BlockingQueue, List>> responses; + private record ResponseAndHits(Client.ResponseOrError response, List hits) {} + + private BlockingQueue responses; /** Whether we have already logged/notified about an error - to avoid spamming */ private boolean hasReportedError = false; @@ -116,7 +117,7 @@ protected void release() { /** Called by a thread belonging to the client when a valid response becomes available */ public void receive(Client.ResponseOrError response, List hitsContext) { - responses.add(new Pair<>(response, hitsContext)); + responses.add(new ResponseAndHits(response, hitsContext)); } /** Return a map of hits by their search node (partition) id */ @@ -140,40 +141,38 @@ private void sendDocsumsRequest(int nodeId, List hits, byte[] payload, log.warning("Got hits with node id " + nodeId + ", which is not included in the current dispatch config"); return; } - Query query = result.getQuery(); Compressor.Compression compressionResult = compressor.compress(query, payload); node.request(RPC_METHOD, compressionResult.type(), payload.length, compressionResult.data(), roe -> receive(roe, hits), clientTimeout); } + private ResponseAndHits getNextResponse(Result result) throws InterruptedException { + long timeLeftMs = result.getQuery().getTimeLeft(); + if (timeLeftMs <= 0) { + return null; + } + var responseAndHits = responses.poll(timeLeftMs, TimeUnit.MILLISECONDS); + if (responseAndHits == null || responseAndHits.response().timeout()) { + return null; + } + return responseAndHits; + } + private void processResponses(Result result, String summaryClass) throws TimeoutException { try { - List alternates = new ArrayList<>(); - alternates.addAll(resourcePool.knownNodeIds()); List skippedHits = new ArrayList<>(); while (outstandingResponses > 0) { - long timeLeftMs = result.getQuery().getTimeLeft(); - if (timeLeftMs <= 0) { - throwTimeout(); - } - var responseAndHits = responses.poll(timeLeftMs, TimeUnit.MILLISECONDS); + var responseAndHits = getNextResponse(result); if (responseAndHits == null) { throwTimeout(); } - var response = responseAndHits.getFirst(); - if (response.timeout()) { - throwTimeout(); - } - var hitsContext = responseAndHits.getSecond(); - skippedHits.addAll(processResponse(result, response, hitsContext, summaryClass)); + skippedHits.addAll(processOneResponse(result, responseAndHits, summaryClass, false)); outstandingResponses--; } - if (! skippedHits.isEmpty()) { ListMap retryMap = new ListMap<>(); - while (alternates.size() > 0) { - int nodeId = alternates.remove(0); + for (Integer nodeId : resourcePool.knownNodeIds()) { for (var hit : skippedHits) { if (hit.getDistributionKey() != nodeId) { retryMap.put(nodeId, hit); @@ -184,23 +183,12 @@ private void processResponses(Result result, String summaryClass) throws Timeout sendFillRequestByNode(result, summaryClass, retryMap); } while (outstandingResponses > 0) { - long timeLeftMs = result.getQuery().getTimeLeft(); - if (timeLeftMs <= 0) { - log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); - break; - } - var responseAndHits = responses.poll(timeLeftMs, TimeUnit.MILLISECONDS); + var responseAndHits = getNextResponse(result); if (responseAndHits == null) { log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); break; } - var response = responseAndHits.getFirst(); - if (response.timeout()) { - log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); - break; - } - var hitsContext = responseAndHits.getSecond(); - speculativeProcessResponse(result, response, hitsContext, summaryClass); + processOneResponse(result, responseAndHits, summaryClass, true); outstandingResponses--; } skippedHits.removeIf(hit -> hit.isFilled(summaryClass)); @@ -214,10 +202,15 @@ private void processResponses(Result result, String summaryClass) throws Timeout } } - private List processResponse(Result result, Client.ResponseOrError responseOrError, List hitsContext, - String summaryClass) { + private List processOneResponse( + Result result, + ResponseAndHits responseAndHits, + String summaryClass, + boolean ignoreErrors) + { + var responseOrError = responseAndHits.response(); if (responseOrError.error().isPresent()) { - if (hasReportedError) { + if (hasReportedError || ignoreErrors) { return List.of(); } String error = responseOrError.error().get(); @@ -227,25 +220,11 @@ private List processResponse(Result result, Client.ResponseOrError responseOrError, - List hitsContext, - String summaryClass) - { - if (responseOrError.error().isPresent()) { - return; - } - Client.ProtobufResponse response = responseOrError.response().get(); - byte[] responseBytes = compressor.decompress(response); - speculativeFill(result, hitsContext, summaryClass, responseBytes); - } - private void addErrors(Result result, com.yahoo.slime.Inspector errors) { errors.traverse((ArrayTraverser) (index, value) -> { int errorCode = ("timeout".equalsIgnoreCase(value.field("type").asString())) ? Error.TIMEOUT.code : Error.UNSPECIFIED.code; @@ -259,19 +238,20 @@ private void convertErrorsFromDocsumReply(Result target, List fill(Result result, List hits, String summaryClass, byte[] payload) { + private List fill(Result result, List hits, String summaryClass, byte[] payload, boolean ignoreErrors) { try { var protobuf = SearchProtocol.DocsumReply.parseFrom(payload); var root = (decodePolicy == DecodePolicy.ONDEMAND) ? BinaryView.inspect(protobuf.getSlimeSummaries().toByteArray()) : BinaryFormat.decode(protobuf.getSlimeSummaries().toByteArray()).get(); - var errors = root.field("errors"); - boolean hasErrors = errors.valid() && (errors.entries() > 0); - if (hasErrors) { - addErrors(result, errors); + if (! ignoreErrors) { + var errors = root.field("errors"); + boolean hasErrors = errors.valid() && (errors.entries() > 0); + if (hasErrors) { + addErrors(result, errors); + } + convertErrorsFromDocsumReply(result, protobuf.getErrorsList()); } - convertErrorsFromDocsumReply(result, protobuf.getErrorsList()); - Inspector summaries = new SlimeAdapter(root.field("docsums")); if (!summaries.valid()) { return List.of(); // No summaries; Perhaps we requested a non-existing summary class @@ -279,46 +259,23 @@ private List fill(Result result, List hits, String summaryClas List skippedHits = new ArrayList<>(); for (int i = 0; i < hits.size(); i++) { Inspector summary = summaries.entry(i).field("docsum"); + FastHit hit = hits.get(i); if (summary.valid()) { - hits.get(i).setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); - hits.get(i).addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); - hits.get(i).setFilled(summaryClass); + hit.setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); + hit.addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); + hit.setFilled(summaryClass); } else { - skippedHits.add(hits.get(i)); + skippedHits.add(hit); } } return skippedHits; } catch (InvalidProtocolBufferException ex) { - log.log(Level.WARNING, "Invalid response to docsum request", ex); - result.hits().addError(ErrorMessage.createInternalServerError("Invalid response to docsum request from backend")); - return List.of(); - } - } - - private void speculativeFill(Result result, List hits, String summaryClass, byte[] payload) { - try { - var protobuf = SearchProtocol.DocsumReply.parseFrom(payload); - var root = (decodePolicy == DecodePolicy.ONDEMAND) - ? BinaryView.inspect(protobuf.getSlimeSummaries().toByteArray()) - : BinaryFormat.decode(protobuf.getSlimeSummaries().toByteArray()).get(); - Inspector summaries = new SlimeAdapter(root.field("docsums")); - if (!summaries.valid()) { - return; // No summaries; Perhaps we requested a non-existing summary class + if (! ignoreErrors) { + log.log(Level.WARNING, "Invalid response to docsum request", ex); + result.hits().addError(ErrorMessage.createInternalServerError("Invalid response to docsum request from backend")); } - for (int i = 0; i < hits.size(); i++) { - Inspector summary = summaries.entry(i).field("docsum"); - if (summary.valid()) { - FastHit hit = hits.get(i); - if (! hit.getFilled().contains(summaryClass)) { - hit.setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); - hit.addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); - hit.setFilled(summaryClass); - } - } - } - } catch (InvalidProtocolBufferException ex) { - log.log(Level.WARNING, "Invalid response to docsum request", ex); } + return List.of(); } private void throwTimeout() throws TimeoutException { From 06da90e9af1e857c26dfc508e6f2f5a5698e9ca6 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 11 Nov 2024 18:34:11 +0000 Subject: [PATCH 003/126] add tunable limits --- container-search/abi-spec.json | 2 ++ .../src/main/java/com/yahoo/search/Query.java | 4 +++ .../dispatch/rpc/RpcProtobufFillInvoker.java | 29 +++++++++++++++---- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 5c70d3902c8e..c3a2172c572e 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -2174,6 +2174,8 @@ "public static final com.yahoo.processing.request.CompoundName NO_CACHE", "public static final com.yahoo.processing.request.CompoundName GROUPING_SESSION_CACHE", "public static final com.yahoo.processing.request.CompoundName TIMEOUT", + "public static final com.yahoo.processing.request.CompoundName DOCSUM_RETRY_LIMIT", + "public static final com.yahoo.processing.request.CompoundName DOCSUM_RETRY_FACTOR", "public static final com.yahoo.processing.request.CompoundName TRACE_LEVEL", "public static final com.yahoo.processing.request.CompoundName EXPLAIN_LEVEL", "public static final java.util.List nativeProperties" diff --git a/container-search/src/main/java/com/yahoo/search/Query.java b/container-search/src/main/java/com/yahoo/search/Query.java index 8e0897b866f5..ee2caf6005a6 100644 --- a/container-search/src/main/java/com/yahoo/search/Query.java +++ b/container-search/src/main/java/com/yahoo/search/Query.java @@ -188,6 +188,8 @@ public static Type getType(String typeString) { public static final CompoundName NO_CACHE = CompoundName.from("noCache"); public static final CompoundName GROUPING_SESSION_CACHE = CompoundName.from("groupingSessionCache"); public static final CompoundName TIMEOUT = CompoundName.from("timeout"); + public static final CompoundName DOCSUM_RETRY_LIMIT = CompoundName.from("docsumRetryLimit"); + public static final CompoundName DOCSUM_RETRY_FACTOR = CompoundName.from("docsumRetryFactor"); /** @deprecated use Trace.LEVEL */ @Deprecated // TODO: Remove on Vespa 9 @@ -211,6 +213,8 @@ public static Type getType(String typeString) { argumentType.addField(new FieldDescription(NO_CACHE.toString(), "boolean", "nocache")); argumentType.addField(new FieldDescription(GROUPING_SESSION_CACHE.toString(), "boolean", "groupingSessionCache")); argumentType.addField(new FieldDescription(TIMEOUT.toString(), "string", "timeout")); + argumentType.addField(new FieldDescription(DOCSUM_RETRY_LIMIT.toString(), "integer")); + argumentType.addField(new FieldDescription(DOCSUM_RETRY_FACTOR.toString(), "double")); argumentType.addField(new FieldDescription(FederationSearcher.SOURCENAME.toString(),"string")); argumentType.addField(new FieldDescription(FederationSearcher.PROVIDERNAME.toString(),"string")); argumentType.addField(new FieldDescription(Model.MODEL, new QueryProfileFieldType(Model.getArgumentType()))); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java index 2449f2375aca..075b60eedaf5 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java @@ -45,7 +45,7 @@ enum DecodePolicy {EAGER, ONDEMAND} private final DocumentDatabase documentDb; private final RpcConnectionPool resourcePool; - private final boolean summaryNeedsQuery; + private boolean summaryNeedsQuery; private final String serverId; private final CompressPayload compressor; private final DecodePolicy decodePolicy; @@ -59,6 +59,7 @@ private record ResponseAndHits(Client.ResponseOrError response /** The number of responses we should receive (and process) before this is complete */ private int outstandingResponses; + private int hitsFilledOk = 0; RpcProtobufFillInvoker(RpcConnectionPool resourcePool, CompressPayload compressor, DocumentDatabase documentDb, String serverId, DecodePolicy decodePolicy, boolean summaryNeedsQuery) { @@ -147,8 +148,8 @@ private void sendDocsumsRequest(int nodeId, List hits, byte[] payload, roe -> receive(roe, hits), clientTimeout); } - private ResponseAndHits getNextResponse(Result result) throws InterruptedException { - long timeLeftMs = result.getQuery().getTimeLeft(); + private ResponseAndHits getNextResponse(Query query) throws InterruptedException { + long timeLeftMs = query.getTimeLeft(); if (timeLeftMs <= 0) { return null; } @@ -163,14 +164,28 @@ private void processResponses(Result result, String summaryClass) throws Timeout try { List skippedHits = new ArrayList<>(); while (outstandingResponses > 0) { - var responseAndHits = getNextResponse(result); + var responseAndHits = getNextResponse(result.getQuery()); if (responseAndHits == null) { throwTimeout(); } skippedHits.addAll(processOneResponse(result, responseAndHits, summaryClass, false)); outstandingResponses--; } - if (! skippedHits.isEmpty()) { + /* + if (skippedHits.isEmpty()) { + // all done OK + return; + } + */ + int numSkipped = skippedHits.size(); + int numTotal = numSkipped + hitsFilledOk; + log.log(Level.WARNING, "total hits: " + numTotal + " ok: " + hitsFilledOk + " skipped: " + numSkipped); + double absoluteRetryLimit = result.getQuery().properties().getInteger(Query.DOCSUM_RETRY_LIMIT, 10); + double retryLimitFactor = result.getQuery().properties().getDouble(Query.DOCSUM_RETRY_FACTOR, 0.5); + log.log(Level.WARNING, "retry limit: " + absoluteRetryLimit + " factor: " + retryLimitFactor); + double retryLimit = Math.min(absoluteRetryLimit, retryLimitFactor * numTotal); + // maybe retry: + if (numSkipped < retryLimit) { ListMap retryMap = new ListMap<>(); for (Integer nodeId : resourcePool.knownNodeIds()) { for (var hit : skippedHits) { @@ -180,10 +195,11 @@ private void processResponses(Result result, String summaryClass) throws Timeout } } if (retryMap.size() > 0) { + summaryNeedsQuery = true; sendFillRequestByNode(result, summaryClass, retryMap); } while (outstandingResponses > 0) { - var responseAndHits = getNextResponse(result); + var responseAndHits = getNextResponse(result.getQuery()); if (responseAndHits == null) { log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); break; @@ -264,6 +280,7 @@ private List fill(Result result, List hits, String summaryClas hit.setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); hit.addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); hit.setFilled(summaryClass); + ++hitsFilledOk; } else { skippedHits.add(hit); } From b106f3c9c8d9cad715d96b6dfc970747c6e0c070 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Tue, 12 Nov 2024 10:10:36 +0000 Subject: [PATCH 004/126] move tunables inside "dispatch" namespace --- container-search/abi-spec.json | 2 -- .../src/main/java/com/yahoo/search/Query.java | 4 ---- .../java/com/yahoo/search/dispatch/Dispatcher.java | 6 ++++++ .../search/dispatch/rpc/RpcProtobufFillInvoker.java | 12 ++++++------ 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index c3a2172c572e..5c70d3902c8e 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -2174,8 +2174,6 @@ "public static final com.yahoo.processing.request.CompoundName NO_CACHE", "public static final com.yahoo.processing.request.CompoundName GROUPING_SESSION_CACHE", "public static final com.yahoo.processing.request.CompoundName TIMEOUT", - "public static final com.yahoo.processing.request.CompoundName DOCSUM_RETRY_LIMIT", - "public static final com.yahoo.processing.request.CompoundName DOCSUM_RETRY_FACTOR", "public static final com.yahoo.processing.request.CompoundName TRACE_LEVEL", "public static final com.yahoo.processing.request.CompoundName EXPLAIN_LEVEL", "public static final java.util.List nativeProperties" diff --git a/container-search/src/main/java/com/yahoo/search/Query.java b/container-search/src/main/java/com/yahoo/search/Query.java index ee2caf6005a6..8e0897b866f5 100644 --- a/container-search/src/main/java/com/yahoo/search/Query.java +++ b/container-search/src/main/java/com/yahoo/search/Query.java @@ -188,8 +188,6 @@ public static Type getType(String typeString) { public static final CompoundName NO_CACHE = CompoundName.from("noCache"); public static final CompoundName GROUPING_SESSION_CACHE = CompoundName.from("groupingSessionCache"); public static final CompoundName TIMEOUT = CompoundName.from("timeout"); - public static final CompoundName DOCSUM_RETRY_LIMIT = CompoundName.from("docsumRetryLimit"); - public static final CompoundName DOCSUM_RETRY_FACTOR = CompoundName.from("docsumRetryFactor"); /** @deprecated use Trace.LEVEL */ @Deprecated // TODO: Remove on Vespa 9 @@ -213,8 +211,6 @@ public static Type getType(String typeString) { argumentType.addField(new FieldDescription(NO_CACHE.toString(), "boolean", "nocache")); argumentType.addField(new FieldDescription(GROUPING_SESSION_CACHE.toString(), "boolean", "groupingSessionCache")); argumentType.addField(new FieldDescription(TIMEOUT.toString(), "string", "timeout")); - argumentType.addField(new FieldDescription(DOCSUM_RETRY_LIMIT.toString(), "integer")); - argumentType.addField(new FieldDescription(DOCSUM_RETRY_FACTOR.toString(), "double")); argumentType.addField(new FieldDescription(FederationSearcher.SOURCENAME.toString(),"string")); argumentType.addField(new FieldDescription(FederationSearcher.PROVIDERNAME.toString(),"string")); argumentType.addField(new FieldDescription(Model.MODEL, new QueryProfileFieldType(Model.getArgumentType()))); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java index 1689f6d246e5..6c3a1251cc74 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java @@ -51,10 +51,14 @@ public class Dispatcher extends AbstractComponent { public static final String DISPATCH = "dispatch"; private static final String TOP_K_PROBABILITY = "topKProbability"; + private static final String DOCSUM_RETRY_LIMIT = "docsumRetryLimit"; + private static final String DOCSUM_RETRY_FACTOR = "docsumRetryFactor"; private static final int MAX_GROUP_SELECTION_ATTEMPTS = 3; /** If set will control computation of how many hits will be fetched from each partition.*/ public static final CompoundName topKProbability = CompoundName.from(DISPATCH + "." + TOP_K_PROBABILITY); + public static final CompoundName docsumRetryLimit = CompoundName.from(DISPATCH + "." + DOCSUM_RETRY_LIMIT); + public static final CompoundName docsumRetryFactor = CompoundName.from(DISPATCH + "." + DOCSUM_RETRY_FACTOR); private final InvokerFactoryFactory invokerFactories; private final DispatchConfig dispatchConfig; @@ -101,6 +105,8 @@ T register(T invoker) { argumentType.setStrict(true); argumentType.setBuiltin(true); argumentType.addField(new FieldDescription(TOP_K_PROBABILITY, FieldType.doubleType)); + argumentType.addField(new FieldDescription(DOCSUM_RETRY_LIMIT, FieldType.integerType)); + argumentType.addField(new FieldDescription(DOCSUM_RETRY_FACTOR, FieldType.doubleType)); argumentType.freeze(); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java index 075b60eedaf5..c3dac98ce4b9 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java @@ -13,6 +13,7 @@ import com.yahoo.prelude.fastsearch.TimeoutException; import com.yahoo.search.Query; import com.yahoo.search.Result; +import com.yahoo.search.dispatch.Dispatcher; import com.yahoo.search.dispatch.FillInvoker; import com.yahoo.search.dispatch.rpc.Client.ProtobufResponse; import com.yahoo.search.result.ErrorMessage; @@ -171,21 +172,18 @@ private void processResponses(Result result, String summaryClass) throws Timeout skippedHits.addAll(processOneResponse(result, responseAndHits, summaryClass, false)); outstandingResponses--; } - /* if (skippedHits.isEmpty()) { // all done OK return; } - */ int numSkipped = skippedHits.size(); int numTotal = numSkipped + hitsFilledOk; - log.log(Level.WARNING, "total hits: " + numTotal + " ok: " + hitsFilledOk + " skipped: " + numSkipped); - double absoluteRetryLimit = result.getQuery().properties().getInteger(Query.DOCSUM_RETRY_LIMIT, 10); - double retryLimitFactor = result.getQuery().properties().getDouble(Query.DOCSUM_RETRY_FACTOR, 0.5); - log.log(Level.WARNING, "retry limit: " + absoluteRetryLimit + " factor: " + retryLimitFactor); + double absoluteRetryLimit = result.getQuery().properties().getInteger(Dispatcher.docsumRetryLimit, 10); + double retryLimitFactor = result.getQuery().properties().getDouble(Dispatcher.docsumRetryFactor, 0.5); double retryLimit = Math.min(absoluteRetryLimit, retryLimitFactor * numTotal); // maybe retry: if (numSkipped < retryLimit) { + log.log(Level.WARNING, "Retry docsum fetch for " + numSkipped + " hits (" + hitsFilledOk + " was ok)"); ListMap retryMap = new ListMap<>(); for (Integer nodeId : resourcePool.knownNodeIds()) { for (var hit : skippedHits) { @@ -208,6 +206,8 @@ private void processResponses(Result result, String summaryClass) throws Timeout outstandingResponses--; } skippedHits.removeIf(hit -> hit.isFilled(summaryClass)); + } else { + log.log(Level.WARNING, "Docsum fetch failed for " + numSkipped + " hits (" + hitsFilledOk + " was ok), no retry"); } if (! skippedHits.isEmpty()) { result.hits().addError(ErrorMessage From 5750233282c241f9fe7a09367a647a23835fabba Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Wed, 6 Nov 2024 09:31:43 +0000 Subject: [PATCH 005/126] Add a Count-Min-based probabilistic relative frequency sketch Adds an implementation of a probabilistic frequency sketch that allows for estimating the relative frequency of of elements from a stream of events. That is, the sketch does not capture the _absolute_ frequency of a given element over time. To reduce the requirement for the number of bits used for the sketch's underlying counters, this sketch uses automatic decaying of counter values once the number of recorded samples reaches a certain point (relative to the sketch's size). Decaying divides all counters by 2. The underlying data structure is a Count-Min sketch [0][1] with automatic decaying of counters based on TinyLFU [2]. This implementation has certain changes from a "textbook" CM sketch, inspired by the approach used in [3]. In particular, instead of having `d` logical rows each with width `w` that are accessed with hash-derived indexes (and thus likely triggering `d` cache misses for large values of `w`) we subdivide into w/64 blocks each with fixed number d=4 rows of 32 4-bit counters, i.e. each block is exactly 64 bytes. Counter updates or reads always happen within the scope of a single block. We also ensure the block array is allocated with at least a 64-byte alignment. This ensures that a given sketch update will touch exactly 1 cache line of the underlying sketch buffer (not counting cache lines occupied by the sketch object itself, as we assume these are already present in the cache). Similarly, comparing the frequency of two elements will always touch at most 2 cache lines. The Count-Min sketch (and its cousin, the Counting Bloom Filter) using `k` counters is usually described as requiring `k` pairwise independent hash functions. This implementation assumes this requirement is unnecessary assuming a hash function with good entropy; we instead extract non-overlapping subsets of bits of a single hash value and use these as indices into our data structure components. References: [0]: The Count-Min Sketch and its Applications (2003) [1]: https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch [2]: TinyLFU: A Highly Efficient Cache Admission Policy (2015) [3]: https://github.com/ben-manes/caffeine/blob/master/caffeine/ src/main/java/com/github/benmanes/caffeine/cache/FrequencySketch.java --- vespalib/src/tests/util/CMakeLists.txt | 1 + .../util/relative_frequency_sketch_test.cpp | 90 ++++++++++ .../src/vespa/vespalib/util/CMakeLists.txt | 1 + .../util/relative_frequency_sketch.cpp | 161 ++++++++++++++++++ .../vespalib/util/relative_frequency_sketch.h | 134 +++++++++++++++ 5 files changed, 387 insertions(+) create mode 100644 vespalib/src/tests/util/relative_frequency_sketch_test.cpp create mode 100644 vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp create mode 100644 vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h diff --git a/vespalib/src/tests/util/CMakeLists.txt b/vespalib/src/tests/util/CMakeLists.txt index fef048cc99c7..c750aeb5c129 100644 --- a/vespalib/src/tests/util/CMakeLists.txt +++ b/vespalib/src/tests/util/CMakeLists.txt @@ -25,6 +25,7 @@ vespa_add_executable(vespalib_util_gtest_runner_test_app TEST random_test.cpp rcuvector_test.cpp ref_counted_test.cpp + relative_frequency_sketch_test.cpp require_test.cpp size_literals_test.cpp small_vector_test.cpp diff --git a/vespalib/src/tests/util/relative_frequency_sketch_test.cpp b/vespalib/src/tests/util/relative_frequency_sketch_test.cpp new file mode 100644 index 000000000000..8acb894f5b28 --- /dev/null +++ b/vespalib/src/tests/util/relative_frequency_sketch_test.cpp @@ -0,0 +1,90 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace vespalib { + +using namespace ::testing; + +namespace { + +struct Identity { + template + constexpr T operator()(T v) const noexcept { return v; } +}; + +} + +struct RelativeFrequencySketchTest : Test { + // Note: although the sketch is inherently _probabilistic_, the below tests are fully + // deterministic as long as the underlying hash function remains the same. This is also why + // we explicitly do _not_ use std::hash here, but defer entirely to (deterministic) XXH3. + using U32FrequencySketch = RelativeFrequencySketch; +}; + +TEST_F(RelativeFrequencySketchTest, frequency_estimates_are_initially_zero) { + U32FrequencySketch sketch(2); + EXPECT_EQ(sketch.count_min(0), 0); + EXPECT_EQ(sketch.count_min(12345), 0); + EXPECT_EQ(sketch.estimate_relative_frequency(123, 456), std::weak_ordering::equivalent); +} + +TEST_F(RelativeFrequencySketchTest, frequency_is_counted_up_to_and_saturated_at_15) { + U32FrequencySketch sketch(1); + for (uint32_t i = 1; i <= 20; ++i) { + sketch.add(7); + // With only one entry we're guaranteed to be exact up to the saturation point + if (i < 15) { + EXPECT_EQ(sketch.count_min(7), i); + } else { + EXPECT_EQ(sketch.count_min(7), 15); + } + } +} + +TEST_F(RelativeFrequencySketchTest, can_track_frequency_of_multiple_elements) { + U32FrequencySketch sketch(3); + sketch.add(100); + sketch.add(200); + sketch.add(300); + sketch.add(200); + + EXPECT_EQ(sketch.count_min(100), 1); + EXPECT_EQ(sketch.count_min(200), 2); + EXPECT_EQ(sketch.count_min(300), 1); + EXPECT_EQ(sketch.count_min(400), 0); + + EXPECT_EQ(sketch.estimate_relative_frequency(0, 100), std::weak_ordering::less); + EXPECT_EQ(sketch.estimate_relative_frequency(100, 0), std::weak_ordering::greater); + EXPECT_EQ(sketch.estimate_relative_frequency(100, 100), std::weak_ordering::equivalent); + EXPECT_EQ(sketch.estimate_relative_frequency(100, 300), std::weak_ordering::equivalent); + EXPECT_EQ(sketch.estimate_relative_frequency(300, 100), std::weak_ordering::equivalent); + EXPECT_EQ(sketch.estimate_relative_frequency(100, 200), std::weak_ordering::less); + EXPECT_EQ(sketch.estimate_relative_frequency(200, 100), std::weak_ordering::greater); +} + +TEST_F(RelativeFrequencySketchTest, counters_are_divided_by_2_once_window_size_reached) { + U32FrequencySketch sketch(8); + const auto ws = sketch.window_size(); + std::vector truth(8); + ASSERT_GT(ws, 0); + for (size_t i = 0; i < ws - 1; ++i) { // don't trigger decay just yet + uint32_t elem = i % 8; + sketch.add(elem); + truth[elem]++; + } + std::vector c_before(8); + for (uint32_t i = 0; i < 8; ++i) { + c_before[i] = sketch.count_min(i); + EXPECT_GE(c_before[i], truth[i]); + // No counters should be saturated yet + EXPECT_LT(c_before[i], 15); + } + // Edge triggered sample ==> should divide all counters + sketch.add(9); + for (uint32_t i = 0; i < 8; ++i) { + EXPECT_EQ(sketch.count_min(i), c_before[i] / 2); + } +} + +} diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 406ea68a08a1..98b0ba3ca437 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -68,6 +68,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT rcuvector.cpp ref_counted.cpp regexp.cpp + relative_frequency_sketch.cpp require.cpp resource_limits.cpp round_up_to_page_size.cpp diff --git a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp new file mode 100644 index 000000000000..3ec8458f11aa --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp @@ -0,0 +1,161 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "relative_frequency_sketch.h" +#include +#include +#include + +namespace vespalib { + +/** + * Cf. the description of the Caffeine sketch in [2][3] we use 8 bytes per cache entry and + * a sample (window) size W that is 10x the cache size (C). It is not immediately clear why + * W/C = 10 rather than 16 since we use 4 bits and log2(10) = 3.321..., but surely the + * underlying reason must be very exciting. + * + * Note: `Alloc` currently does not support < 512 byte alignment, which is suboptimal if + * the allocation is small enough to end up on the heap (FIXME). + */ +RawRelativeFrequencySketch::RawRelativeFrequencySketch(size_t count) + : _buf(alloc::Alloc::alloc_aligned(roundUp2inN(std::max(size_t(64U), count * 8)), 512)), + _samples_since_decay(0), + _window_size((_buf.size() / 8) * 10), + _block_mask_bits(_buf.size() > 64 ? Optimized::msbIdx(_buf.size() / 64) : 0) +{ + assert(_block_mask_bits <= 48); // Will always be the case in practice, but it's an invariant... + memset(_buf.get(), 0, _buf.size()); +} + +RawRelativeFrequencySketch::~RawRelativeFrequencySketch() = default; + +/** + * Add an element by its hash. This involves incrementing 4 distinct counters based on the hash. + * + * Our sketch buffer is logically divided into buf_size/64 distinct 64-byte blocks. Each + * block is in turn logically divided into 4 rows x 32 4-bit counters, laid out sequentially. + * Each counter is saturated at 15, i.e. there is no overflow. + * + * We first select the block based on the B LSBs of the hash, where B is log2(buffer_size/64) + * and buffer_size is always a power of two. These B bits are considered consumed and are not + * used for anything else. + * + * Within the block we always update exactly 1 counter in each logical row. Use 5 distinct + * bits from the hash for each of the 4 row updates (4 bits to select a byte out of 16, 1 for + * selecting either the high or low in-byte nibble). To make a nice round number, round up to + * consuming 8 bits per row (the 3 remaining bits are unused). + * + * We use the same conditional decay trigger as the Caffeine sketch, in that we only bump + * the observed sample count (and possibly decay the counters) iff we actually increment at + * least one counter (i.e. not all counters are pre-saturated). The rationale for this is not + * stated outright in the code, but it makes sense as a way to gracefully handle repeated + * insertions of a small set of very high frequency elements. If we always counted these as + * distinct samples we would eventually decay the counters until we have forgotten _all_ + * elements that are not similarly frequent. + */ +void RawRelativeFrequencySketch::add_by_hash(uint64_t hash) noexcept { + const uint64_t block = hash & ((1u << _block_mask_bits) - 1); + hash >>= _block_mask_bits; + assert(block*64 + 64 <= _buf.size()); + auto* block_ptr = static_cast(_buf.get()) + (block * 64); + uint16_t old_counter_bits = 0; + // The compiler will happily and easily unroll this loop. + for (uint8_t i = 0; i < 4; ++i) { + uint8_t h = hash >> (i*8); // Note: we only use 5 out of the 8 bits + uint8_t* vp = block_ptr + (i * 16) + (h & 0xf); // row #i byte select + const uint8_t v = *vp; + h >>= 4; + const uint8_t nib_shift = (h & 1) * 4; // High or low nibble shift factor (4 or 0) + const uint8_t nib_mask = 0xf << nib_shift; + const uint8_t nib_old = (v & nib_mask) >> nib_shift; + const uint8_t nib_new = nib_old < 15 ? nib_old + 1 : 15; // Saturated add + const uint8_t nib_rem = v & ~nib_mask; // Untouched nibble that should be preserved + old_counter_bits |= nib_old << (i * 4); + *vp = (nib_new << nib_shift) | nib_rem; + } + if (old_counter_bits != 0xffff && (++_samples_since_decay >= _window_size)) [[unlikely]] { + div_all_by_2(); + _samples_since_decay /= 2; + } +} + +/** + * Estimates the count associated with the given hash. This uses the exact same counter + * addressing as `add_by_hash()`, so refer to that function for a description on the + * semantics. As the name Count-Min implies we take the _minimum_ of the observed counters + * and return this value to the caller. + * + * This will over-estimate the true frequency iff _all_ counters overlap with at least one + * other element, but it will never under-estimate (here casually ignoring the effects of + * counter decaying). + */ +uint8_t RawRelativeFrequencySketch::count_min_by_hash(uint64_t hash) const noexcept { + const uint64_t block = hash & ((1u << _block_mask_bits) - 1); + hash >>= _block_mask_bits; + const uint8_t* block_ptr = static_cast(_buf.get()) + (block * 64); + uint8_t cm[4]; + for (uint8_t i = 0; i < 4; ++i) { + uint8_t h = hash >> (i*8); + const uint8_t* vp = block_ptr + (i * 16) + (h & 0xf); // row #i byte select + h >>= 4; + const uint8_t nib_shift = (h & 1) * 4; // 4 or 0 + const uint8_t nib_mask = 0xf << nib_shift; + cm[i] = (*vp & nib_mask) >> nib_shift; + } + return std::min(std::min(cm[0], cm[1]), std::min(cm[2], cm[3])); +} + +std::weak_ordering +RawRelativeFrequencySketch::estimate_relative_frequency_by_hash(uint64_t lhs_hash, uint64_t rhs_hash) const noexcept { + return count_min_by_hash(lhs_hash) <=> count_min_by_hash(rhs_hash); +} + +/** + * Divides all the 4-bit counters in the sketch by 2. Since this integral division, we + * inherently lose some precision for odd-numbered counter values. + * + * We speed up the division by treating each 64-byte block as 8x u64 values that can + * logically be processed in parallel. The compiler will unroll and auto-vectorize the u64 + * fixed-count inner-loop as expected (verified via Godbolt). + * + * Each u64 value is right-shifted by 1. This shifts the LSB of all 16 4-bit nibbles (except + * the last one) into the MSB of the next nibble. We want the semantics as-if each nibble + * were in its own register, which would mean shifting in a zero bit in the MSB instead. + * We emulate this by explicitly clearing all nibble MSBs. This effectively divides all + * nibbles by 2. This should be entirely endian-agnostic. + * + * Example: + * + * Before: + * nibble#: [ 15 ][ 14 ][ 13 ][ 12 ][ ... + * bits: 1111 0011 0000 1100 ... + * value: 15 3 0 12 ... + * + * After shift (_uncorrected_ prior to masking) + * nibble#: [ 15 ][ 14 ][ 13 ][ 12 ][ ... + * bits: 0111 1001 1000 0110 0... + * value: 7 9 8 6 ... + * + * We will then apply the following per-nibble mask: + * mask: 0111 0111 0111 0111 0... + * + * After shift (corrected by masking off nibble MSBs) + * nibble#: [ 15 ][ 14 ][ 13 ][ 12 ][ ... + * bits: 0111 0001 0000 0110 0... + * value: 7 1 0 6 ... + */ +void RawRelativeFrequencySketch::div_all_by_2() noexcept { + const uint64_t n_blocks = _buf.size() / 64; + auto* block_ptr = static_cast(_buf.get()); + for (uint64_t i = 0; i < n_blocks; ++i) { + for (uint32_t j = 0; j < 8; ++j) { + uint64_t chunk; + // Compiler will optimize away memcpys (avoids aliasing). + memcpy(&chunk, block_ptr + (sizeof(uint64_t) * j), sizeof(uint64_t)); + chunk >>= 1; + chunk &= 0x7777777777777777ULL; // nibble ~MSB mask + memcpy(block_ptr + (sizeof(uint64_t) * j), &chunk, sizeof(uint64_t)); + } + block_ptr += 64; + } +} + +} // vespalib diff --git a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h new file mode 100644 index 000000000000..db008768792a --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h @@ -0,0 +1,134 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "alloc.h" +#include +#include + +namespace vespalib { + +/** + * Adds an implementation of a probabilistic frequency sketch that allows for estimating the + * relative frequency of of elements from a stream of events. That is, the sketch does not + * capture the _absolute_ frequency of a given element over time. + * + * To reduce the requirement for the number of bits used for the sketch's underlying counters, + * this sketch uses automatic decaying of counter values once the number of recorded samples + * reaches a certain point (relative to the sketch's size). Decaying divides all counters by 2. + * + * The underlying data structure is a Count-Min sketch [0][1] with automatic decaying of + * counters based on TinyLFU [2]. + * + * This implementation has certain changes from a "textbook" CM sketch, inspired by the + * approach used in [3]. In particular, instead of having `d` logical rows each with width `w` + * that are accessed with hash-derived indexes (and thus likely triggering `d` cache misses + * for large values of `w`) we subdivide into w/64 blocks each with fixed number d=4 rows of + * 32 4-bit counters, i.e. each block is exactly 64 bytes. Counter updates or reads always + * happen within the scope of a single block. We also ensure the block array is allocated with + * at least a 64-byte alignment. This ensures that a given sketch access will touch exactly 1 + * cache line of the underlying sketch buffer (not counting cache lines occupied by the sketch + * object itself, as we assume these are already present in the cache). + * Similarly, comparing the frequency of two elements will always touch at most 2 cache lines. + * + * Unlike [3] we use byte-wise counter accesses and only using a single hash computation per + * distinct sketch lookup instead of explicitly re-mixing hash bits. We also always divide the + * decay counter by 2 instead of subtracting the number of odd counters found (TODO reconsider?). + * + * The Count-Min sketch (and its cousin, the Counting Bloom Filter) using `k` counters is + * usually described as requiring k pairwise independent hash functions. This implementation + * assumes this requirement is unnecessary assuming a hash function with good entropy; we + * instead extract non-overlapping subsets of bits of a single hash value and use these as + * indices into our data structure components. + * + * Important: this frequency sketch _requires_ a good hash function, i.e. high entropy. + * Use `RelativeFrequencySketch` with HasGoodEntropyHash=false (default) if this is not the + * case for the type being counted, as it implicitly mixes the hash bits using XXH3. + * + * Thread safety: as thread safe as a std::vector. + * + * References: + * [0]: The Count-Min Sketch and its Applications (2003) + * [1]: https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch + * [2]: TinyLFU: A Highly Efficient Cache Admission Policy (2015) + * [3]: https://github.com/ben-manes/caffeine/blob/master/caffeine/ + * src/main/java/com/github/benmanes/caffeine/cache/FrequencySketch.java + */ +class RawRelativeFrequencySketch { + alloc::Alloc _buf; + size_t _samples_since_decay; + size_t _window_size; + uint32_t _block_mask_bits; +public: + explicit RawRelativeFrequencySketch(size_t count); + ~RawRelativeFrequencySketch(); + + void add_by_hash(uint64_t hash) noexcept; + [[nodiscard]] std::weak_ordering estimate_relative_frequency_by_hash(uint64_t lhs_hash, uint64_t rhs_hash) const noexcept; + + // Gets the raw underlying counter value saturated in [0, 15] for a given hash. + [[nodiscard]] uint8_t count_min_by_hash(uint64_t hash) const noexcept; + + void div_all_by_2() noexcept; + + [[nodiscard]] size_t window_size() const noexcept { return _window_size; } +}; + +/** + * Wrapper of RawRelativeFrequencySketch for an arbitrary hashable type. + * + * Only set HasGoodEntropyHash=true if you know that the underlying hash function is + * of good quality. This _excludes_ std::hash<> hashes, especially those for integers, + * as the hash function for those is more often than not the identity function. + * + * See `RawRelativeFrequencySketch` for algorithm details. + */ +template , bool HasGoodEntropyHash = false> +requires requires(Hash h, T t) { noexcept(noexcept(h(t))); } +class RelativeFrequencySketch { + RawRelativeFrequencySketch _impl; + Hash _hash; +public: + // Initializes a sketch used for estimating frequencies for an underlying cache + // (or similar datastructure) that can hold a maximum of `count` entries. + explicit RelativeFrequencySketch(size_t count, Hash hash = Hash{}) + : _impl(count), + _hash(hash) + {} + ~RelativeFrequencySketch() = default; +private: + [[nodiscard]] uint64_t hash_elem(const T& elem) const noexcept { + uint64_t hash = _hash(elem); + if constexpr (!HasGoodEntropyHash) { + hash = xxhash::xxh3_64(hash); // Mix it up! + } + return hash; + } +public: + // Increments the estimated frequency for the given element, identified by its hash. + // Frequency is saturated at 15. + void add(const T& elem) noexcept { + _impl.add_by_hash(hash_elem(elem)); + } + // Returns a frequency estimate for the given element, saturated at 15. Since this is + // a probabilistic sketch, the frequency may be overestimated. Note that automatic counter + // decaying will over time reduce the reported frequency of elements that are no longer + // added to the sketch. + [[nodiscard]] uint8_t count_min(const T& elem) const noexcept { + return _impl.count_min_by_hash(hash_elem(elem)); + } + [[nodiscard]] std::weak_ordering estimate_relative_frequency(const T& lhs, const T& rhs) const noexcept { + const uint64_t lhs_hash = hash_elem(lhs); + const uint64_t rhs_hash = hash_elem(rhs); + return _impl.estimate_relative_frequency_by_hash(lhs_hash, rhs_hash); + } + // Manually trigger counter decay; divides all count estimates by 2 + void div_all_by_2() { + _impl.div_all_by_2(); + } + // Sample count required before all counters are automatically divided by 2. + // Note that invoking `add(v)` for an element `v` whose counters are _all_ fully + // saturated prior to the invocation will _not_ count towards the sample count. + [[nodiscard]] size_t window_size() const noexcept { return _impl.window_size(); } +}; + +} // vespalib From 7086003a009720b79a82d370648e4548e8f0c451 Mon Sep 17 00:00:00 2001 From: Arnstein Ressem Date: Wed, 13 Nov 2024 08:50:02 +0100 Subject: [PATCH 006/126] Revert "Revert "Switch to api.factory.vespa.ai"" --- ...-new-factory.sh => factory-command-old.sh} | 39 +++++++++++++++---- .buildkite/factory-command.sh | 39 ++++--------------- 2 files changed, 39 insertions(+), 39 deletions(-) rename .buildkite/{factory-command-new-factory.sh => factory-command-old.sh} (63%) diff --git a/.buildkite/factory-command-new-factory.sh b/.buildkite/factory-command-old.sh similarity index 63% rename from .buildkite/factory-command-new-factory.sh rename to .buildkite/factory-command-old.sh index 5d49121db313..ce9ecc70ef58 100755 --- a/.buildkite/factory-command-new-factory.sh +++ b/.buildkite/factory-command-old.sh @@ -8,24 +8,47 @@ if (( $# < 1 )); then fi COMMAND=$1 -FACTORY_API="https://api.factory.vespa.ai/factory/v1" +FACTORY_API="https://factory.vespa.aws-us-east-1a.vespa.oath.cloud/api/factory/v1" +COOKIEJAR=$(pwd)/jar.txt +# shellcheck disable=2064 +trap "rm -f $COOKIEJAR" EXIT -CURL="curl -sL --key /workspace/identity/key --cert /workspace/identity/cert" -TOKEN=$(curl -sL --key /workspace/identity/key --cert /workspace/identity/cert -X POST -H "Content-Type: application/x-www-form-urlencoded" -d"grant_type=client_credentials&scope=vespa.factory%3Adomain" "https://zts.athenz.vespa-cloud.com:4443/zts/v1/oauth2/token" | jq -re '.access_token') +SESSION_TOKEN=null +WAIT_UNTIL=$(( $(date +%s) + 120 )) +set +e +while [[ $SESSION_TOKEN == null ]]; do + SESSION_TOKEN=$(curl -s -H 'Content-Type: application/json' -H 'Accept: application/json' -d "{ \"username\": \"svc-okta-vespa-factory\", \"password\": \"$SVC_OKTA_VESPA_FACTORY_TOKEN\" }" https://ouryahoo.okta.com/api/v1/authn | jq -re '.sessionToken') + + if [[ $SESSION_TOKEN == null ]]; then + if [[ $(date +%s) -ge $WAIT_UNTIL ]]; then + echo "Could not fetch session token from Okta: SESSION_TOKEN=$SESSION_TOKEN" + exit 1 + else + echo "Invalid SESSION_TOKEN=$SESSION_TOKEN . Trying again ..." >&2 + sleep 3 + fi + fi +done +set -e + +LOCATION=$(curl -s -i -c "$COOKIEJAR" "https://factory.vespa.aws-us-east-1a.vespa.oath.cloud/login" | grep location | awk '{print $2}' | tr -d '\r') +curl -sL -b "$COOKIEJAR" -c "$COOKIEJAR" "$LOCATION&sessionToken=$SESSION_TOKEN" &> /dev/null + +CURL="curl -sL -b $COOKIEJAR" shift case $COMMAND in get-version) VERSION=$1 if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL -H "Authorization: Bearer $TOKEN" "$FACTORY_API/versions/$VERSION" + $CURL "$FACTORY_API/versions/$VERSION" ;; create-build) FACTORY_PIPELINE_ID=$1 FACTORY_PLATFORM=$2 if [[ -z $FACTORY_PIPELINE_ID ]]; then echo "Usage: $0 $COMMAND [factory platform]"; exit 1; fi if [[ -z $FACTORY_PLATFORM ]]; then FACTORY_PLATFORM="opensource_centos7"; fi - $CURL -H "Authorization: Bearer $TOKEN" -d "{ + $CURL -d "{ \"startSeconds\": $(date +%s), \"sdApiUrl\": \"https://api.buildkite.com/\", \"pipelineId\": $FACTORY_PIPELINE_ID, @@ -36,7 +59,7 @@ case $COMMAND in "$FACTORY_API/builds" ;; create-release) - $CURL -H "Authorization: Bearer $TOKEN" -d "{ + $CURL -d "{ \"startSeconds\": $(date +%s), \"systemName\": \"opensource\" }" \ @@ -51,7 +74,7 @@ case $COMMAND in echo "Usage: $0 $COMMAND " exit 1 fi - $CURL -H "Authorization: Bearer $TOKEN" -d "{ + $CURL -d "{ \"updatedSeconds\": $(date +%s), \"sdApiUrl\": \"https://api.buildkite.com/\", \"pipelineId\": $FACTORY_PIPELINE_ID, @@ -65,7 +88,7 @@ case $COMMAND in update-released-time) VERSION=$1 if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL -H "Authorization: Bearer $TOKEN" -d "{ + $CURL -d "{ \"releasedSeconds\": $(date +%s), \"systemName\": \"opensource\" }" \ diff --git a/.buildkite/factory-command.sh b/.buildkite/factory-command.sh index ce9ecc70ef58..5d49121db313 100755 --- a/.buildkite/factory-command.sh +++ b/.buildkite/factory-command.sh @@ -8,47 +8,24 @@ if (( $# < 1 )); then fi COMMAND=$1 -FACTORY_API="https://factory.vespa.aws-us-east-1a.vespa.oath.cloud/api/factory/v1" -COOKIEJAR=$(pwd)/jar.txt -# shellcheck disable=2064 -trap "rm -f $COOKIEJAR" EXIT +FACTORY_API="https://api.factory.vespa.ai/factory/v1" -SESSION_TOKEN=null -WAIT_UNTIL=$(( $(date +%s) + 120 )) -set +e -while [[ $SESSION_TOKEN == null ]]; do - SESSION_TOKEN=$(curl -s -H 'Content-Type: application/json' -H 'Accept: application/json' -d "{ \"username\": \"svc-okta-vespa-factory\", \"password\": \"$SVC_OKTA_VESPA_FACTORY_TOKEN\" }" https://ouryahoo.okta.com/api/v1/authn | jq -re '.sessionToken') - - if [[ $SESSION_TOKEN == null ]]; then - if [[ $(date +%s) -ge $WAIT_UNTIL ]]; then - echo "Could not fetch session token from Okta: SESSION_TOKEN=$SESSION_TOKEN" - exit 1 - else - echo "Invalid SESSION_TOKEN=$SESSION_TOKEN . Trying again ..." >&2 - sleep 3 - fi - fi -done -set -e - -LOCATION=$(curl -s -i -c "$COOKIEJAR" "https://factory.vespa.aws-us-east-1a.vespa.oath.cloud/login" | grep location | awk '{print $2}' | tr -d '\r') -curl -sL -b "$COOKIEJAR" -c "$COOKIEJAR" "$LOCATION&sessionToken=$SESSION_TOKEN" &> /dev/null - -CURL="curl -sL -b $COOKIEJAR" +CURL="curl -sL --key /workspace/identity/key --cert /workspace/identity/cert" +TOKEN=$(curl -sL --key /workspace/identity/key --cert /workspace/identity/cert -X POST -H "Content-Type: application/x-www-form-urlencoded" -d"grant_type=client_credentials&scope=vespa.factory%3Adomain" "https://zts.athenz.vespa-cloud.com:4443/zts/v1/oauth2/token" | jq -re '.access_token') shift case $COMMAND in get-version) VERSION=$1 if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL "$FACTORY_API/versions/$VERSION" + $CURL -H "Authorization: Bearer $TOKEN" "$FACTORY_API/versions/$VERSION" ;; create-build) FACTORY_PIPELINE_ID=$1 FACTORY_PLATFORM=$2 if [[ -z $FACTORY_PIPELINE_ID ]]; then echo "Usage: $0 $COMMAND [factory platform]"; exit 1; fi if [[ -z $FACTORY_PLATFORM ]]; then FACTORY_PLATFORM="opensource_centos7"; fi - $CURL -d "{ + $CURL -H "Authorization: Bearer $TOKEN" -d "{ \"startSeconds\": $(date +%s), \"sdApiUrl\": \"https://api.buildkite.com/\", \"pipelineId\": $FACTORY_PIPELINE_ID, @@ -59,7 +36,7 @@ case $COMMAND in "$FACTORY_API/builds" ;; create-release) - $CURL -d "{ + $CURL -H "Authorization: Bearer $TOKEN" -d "{ \"startSeconds\": $(date +%s), \"systemName\": \"opensource\" }" \ @@ -74,7 +51,7 @@ case $COMMAND in echo "Usage: $0 $COMMAND " exit 1 fi - $CURL -d "{ + $CURL -H "Authorization: Bearer $TOKEN" -d "{ \"updatedSeconds\": $(date +%s), \"sdApiUrl\": \"https://api.buildkite.com/\", \"pipelineId\": $FACTORY_PIPELINE_ID, @@ -88,7 +65,7 @@ case $COMMAND in update-released-time) VERSION=$1 if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL -d "{ + $CURL -H "Authorization: Bearer $TOKEN" -d "{ \"releasedSeconds\": $(date +%s), \"systemName\": \"opensource\" }" \ From 9a2a55b4128a15c8b17a081826dcc966d9488b68 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 13 Nov 2024 10:49:42 +0000 Subject: [PATCH 007/126] refactor, avoid excessive logging --- .../dispatch/rpc/RpcProtobufFillInvoker.java | 122 ++++++++++++------ 1 file changed, 83 insertions(+), 39 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java index c3dac98ce4b9..fc7a219cc3fe 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java @@ -27,6 +27,7 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.logging.Logger; @@ -60,7 +61,7 @@ private record ResponseAndHits(Client.ResponseOrError response /** The number of responses we should receive (and process) before this is complete */ private int outstandingResponses; - private int hitsFilledOk = 0; + private int numOkFilledHits = 0; RpcProtobufFillInvoker(RpcConnectionPool resourcePool, CompressPayload compressor, DocumentDatabase documentDb, String serverId, DecodePolicy decodePolicy, boolean summaryNeedsQuery) { @@ -149,8 +150,7 @@ private void sendDocsumsRequest(int nodeId, List hits, byte[] payload, roe -> receive(roe, hits), clientTimeout); } - private ResponseAndHits getNextResponse(Query query) throws InterruptedException { - long timeLeftMs = query.getTimeLeft(); + private ResponseAndHits getNextResponse(long timeLeftMs) throws InterruptedException { if (timeLeftMs <= 0) { return null; } @@ -165,7 +165,7 @@ private void processResponses(Result result, String summaryClass) throws Timeout try { List skippedHits = new ArrayList<>(); while (outstandingResponses > 0) { - var responseAndHits = getNextResponse(result.getQuery()); + var responseAndHits = getNextResponse(result.getQuery().getTimeLeft()); if (responseAndHits == null) { throwTimeout(); } @@ -176,39 +176,7 @@ private void processResponses(Result result, String summaryClass) throws Timeout // all done OK return; } - int numSkipped = skippedHits.size(); - int numTotal = numSkipped + hitsFilledOk; - double absoluteRetryLimit = result.getQuery().properties().getInteger(Dispatcher.docsumRetryLimit, 10); - double retryLimitFactor = result.getQuery().properties().getDouble(Dispatcher.docsumRetryFactor, 0.5); - double retryLimit = Math.min(absoluteRetryLimit, retryLimitFactor * numTotal); - // maybe retry: - if (numSkipped < retryLimit) { - log.log(Level.WARNING, "Retry docsum fetch for " + numSkipped + " hits (" + hitsFilledOk + " was ok)"); - ListMap retryMap = new ListMap<>(); - for (Integer nodeId : resourcePool.knownNodeIds()) { - for (var hit : skippedHits) { - if (hit.getDistributionKey() != nodeId) { - retryMap.put(nodeId, hit); - } - } - } - if (retryMap.size() > 0) { - summaryNeedsQuery = true; - sendFillRequestByNode(result, summaryClass, retryMap); - } - while (outstandingResponses > 0) { - var responseAndHits = getNextResponse(result.getQuery()); - if (responseAndHits == null) { - log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); - break; - } - processOneResponse(result, responseAndHits, summaryClass, true); - outstandingResponses--; - } - skippedHits.removeIf(hit -> hit.isFilled(summaryClass)); - } else { - log.log(Level.WARNING, "Docsum fetch failed for " + numSkipped + " hits (" + hitsFilledOk + " was ok), no retry"); - } + maybeRetry(skippedHits, result, summaryClass); if (! skippedHits.isEmpty()) { result.hits().addError(ErrorMessage .createEmptyDocsums("Missing hit summary data for summary " + summaryClass + " for " + skippedHits + " hits")); @@ -254,6 +222,8 @@ private void convertErrorsFromDocsumReply(Result target, List fill(Result result, List hits, String summaryClass, byte[] payload, boolean ignoreErrors) { try { var protobuf = SearchProtocol.DocsumReply.parseFrom(payload); @@ -276,11 +246,11 @@ private List fill(Result result, List hits, String summaryClas for (int i = 0; i < hits.size(); i++) { Inspector summary = summaries.entry(i).field("docsum"); FastHit hit = hits.get(i); - if (summary.valid()) { + if (((++monkey % 42) != 0) && summary.valid() && ! hit.isFilled(summaryClass)) { hit.setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); hit.addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); hit.setFilled(summaryClass); - ++hitsFilledOk; + ++numOkFilledHits; } else { skippedHits.add(hit); } @@ -299,4 +269,78 @@ private void throwTimeout() throws TimeoutException { throw new TimeoutException("Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); } + /* + * The content layer may return some empty docsums when redistribution is in progress, + * and in that case the document should be present on some other node, and we should + * be able to get the docsum from that node if we retry. But we don't know where + * that would be, so we need to try all possible nodes. + * To avoid overloading the content layer, we only retry if the number of skipped hits + * is below a tunable limit, and if the ratio of failed to ok hits is below another + * tunable limit (if too much failed on first try, it's likely not helpful to retry). + */ + private void maybeRetry(List skippedHits, Result result, String summaryClass) throws InterruptedException { + int numSkipped = skippedHits.size(); + int numTotal = numSkipped + numOkFilledHits; + var query = result.getQuery(); + double absoluteRetryLimit = query.properties().getInteger(Dispatcher.docsumRetryLimit, 10); + double retryLimitFactor = query.properties().getDouble(Dispatcher.docsumRetryFactor, 0.5); + double retryLimit = Math.min(absoluteRetryLimit, retryLimitFactor * numTotal); + if (numSkipped < retryLimit) { + ListMap retryMap = new ListMap<>(); + for (Integer nodeId : resourcePool.knownNodeIds()) { + for (var hit : skippedHits) { + if (hit.getDistributionKey() != nodeId) { + retryMap.put(nodeId, hit); + } + } + } + // no retry if there is only one node + if (retryMap.size() > 0) { + if (shouldLogRetry()) { + log.log(Level.WARNING, "Retry docsum fetch for " + numSkipped + " hits (" + numOkFilledHits + " ok hits)"); + } + summaryNeedsQuery = true; + sendFillRequestByNode(result, summaryClass, retryMap); + while (outstandingResponses > 0 && numOkFilledHits < numTotal) { + var responseAndHits = getNextResponse(query.getTimeLeft()); + if (responseAndHits == null) { + if (shouldLogRetryTimeout()) { + log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); + } + break; + } + processOneResponse(result, responseAndHits, summaryClass, true); + outstandingResponses--; + } + skippedHits.removeIf(hit -> hit.isFilled(summaryClass)); + } + } else { + if (shouldLogNoRetry()) { + log.log(Level.WARNING, "Docsum fetch failed for " + numSkipped + " hits (" + numOkFilledHits + " ok hits), no retry"); + } + } + } + + private static boolean shouldLogForCount(int count) { + if (count < 100) return true; + if (count < 1000) return (count % 100) == 0; + if (count < 100000) return (count % 1000) == 0; + return (count % 10000) == 0; + } + private static final AtomicInteger retryCounter = new AtomicInteger(); + private static final AtomicInteger noRetryCounter = new AtomicInteger(); + private static final AtomicInteger retryTimeoutCounter = new AtomicInteger(); + private static boolean shouldLogRetry() { + int count = retryCounter.getAndAdd(1); + return shouldLogForCount(count); + } + private static boolean shouldLogNoRetry() { + int count = noRetryCounter.getAndAdd(1); + return shouldLogForCount(count); + } + private static boolean shouldLogRetryTimeout() { + int count = retryTimeoutCounter.getAndAdd(1); + return shouldLogForCount(count); + } + } From 70da2c3e03118b18a09d2b31429e3e3cfc69dcc3 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 13 Nov 2024 10:51:15 +0000 Subject: [PATCH 008/126] remove chaos monkey --- .../com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java index fc7a219cc3fe..db1d4588eebe 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java @@ -222,8 +222,6 @@ private void convertErrorsFromDocsumReply(Result target, List fill(Result result, List hits, String summaryClass, byte[] payload, boolean ignoreErrors) { try { var protobuf = SearchProtocol.DocsumReply.parseFrom(payload); @@ -246,7 +244,7 @@ private List fill(Result result, List hits, String summaryClas for (int i = 0; i < hits.size(); i++) { Inspector summary = summaries.entry(i).field("docsum"); FastHit hit = hits.get(i); - if (((++monkey % 42) != 0) && summary.valid() && ! hit.isFilled(summaryClass)) { + if (summary.valid() && ! hit.isFilled(summaryClass)) { hit.setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); hit.addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); hit.setFilled(summaryClass); From 6a5665752093c39fd4cc3a3c96b169d218106a89 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 14 Nov 2024 08:01:24 +0000 Subject: [PATCH 009/126] add some tracing --- .../dispatch/rpc/RpcProtobufFillInvoker.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java index db1d4588eebe..8e3b64d1fba9 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java @@ -62,6 +62,7 @@ private record ResponseAndHits(Client.ResponseOrError response /** The number of responses we should receive (and process) before this is complete */ private int outstandingResponses; private int numOkFilledHits = 0; + private int numHitsToFill = 0; RpcProtobufFillInvoker(RpcConnectionPool resourcePool, CompressPayload compressor, DocumentDatabase documentDb, String serverId, DecodePolicy decodePolicy, boolean summaryNeedsQuery) { @@ -124,12 +125,14 @@ public void receive(Client.ResponseOrError response, List hitsByNode(Result result) { + private final ListMap hitsByNode(Result result) { ListMap hitsByNode = new ListMap<>(); - for (Hit hit : (Iterable) result.hits()::unorderedDeepIterator) - if (hit instanceof FastHit fastHit) + for (Hit hit : (Iterable) result.hits()::unorderedDeepIterator) { + if (hit instanceof FastHit fastHit) { + ++numHitsToFill; hitsByNode.put(fastHit.getDistributionKey(), fastHit); - + } + } return hitsByNode; } @@ -278,12 +281,12 @@ private void throwTimeout() throws TimeoutException { */ private void maybeRetry(List skippedHits, Result result, String summaryClass) throws InterruptedException { int numSkipped = skippedHits.size(); - int numTotal = numSkipped + numOkFilledHits; var query = result.getQuery(); double absoluteRetryLimit = query.properties().getInteger(Dispatcher.docsumRetryLimit, 10); double retryLimitFactor = query.properties().getDouble(Dispatcher.docsumRetryFactor, 0.5); - double retryLimit = Math.min(absoluteRetryLimit, retryLimitFactor * numTotal); + double retryLimit = Math.min(absoluteRetryLimit, retryLimitFactor * numHitsToFill); if (numSkipped < retryLimit) { + result.getQuery().trace(false, 1, "Retry summary fetching for " + numSkipped + " empty docsums (of " + numHitsToFill + " hits)"); ListMap retryMap = new ListMap<>(); for (Integer nodeId : resourcePool.knownNodeIds()) { for (var hit : skippedHits) { @@ -299,7 +302,7 @@ private void maybeRetry(List skippedHits, Result result, String summary } summaryNeedsQuery = true; sendFillRequestByNode(result, summaryClass, retryMap); - while (outstandingResponses > 0 && numOkFilledHits < numTotal) { + while (outstandingResponses > 0 && numOkFilledHits < numHitsToFill) { var responseAndHits = getNextResponse(query.getTimeLeft()); if (responseAndHits == null) { if (shouldLogRetryTimeout()) { @@ -313,6 +316,7 @@ private void maybeRetry(List skippedHits, Result result, String summary skippedHits.removeIf(hit -> hit.isFilled(summaryClass)); } } else { + result.getQuery().trace(false, 1, "Summary fetching got " + numSkipped + " empty docsums (of " + numHitsToFill + " hits), no retry"); if (shouldLogNoRetry()) { log.log(Level.WARNING, "Docsum fetch failed for " + numSkipped + " hits (" + numOkFilledHits + " ok hits), no retry"); } From 23228cdbf7808568aee9a08cdaea4bdaadebc3cd Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Thu, 14 Nov 2024 09:50:55 +0100 Subject: [PATCH 010/126] fix: struct dot syntax to one level --- .../language-server/src/main/ccc/yqlplus/YQLPlus.ccc | 2 +- .../java/ai/vespa/schemals/schemadocument/YQLDocument.java | 2 +- .../src/test/java/ai/vespa/schemals/YQLParserTest.java | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc b/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc index 027192fce509..4448c372c87a 100644 --- a/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc +++ b/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc @@ -609,7 +609,7 @@ primary_expression: ( expression(in_select) ) | constant_expression | ( - (SCAN 2 => call_expression(in_select)) + (SCAN 4 => call_expression(in_select)) // WARNING: The scan number could be very large. This will catch all Myfield.MyChild. However nested structs will not parse | fieldref ) ) diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java index 7a9d5bd3ddae..d4a23ec004bb 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java @@ -190,7 +190,7 @@ public static ParseResult parseContent(ParseContext context) { charsRead = newOffset; } - // YQLUtils.printTree(context.logger(), ret); + YQLUtils.printTree(context.logger(), ret); return new ParseResult(diagnostics, Optional.of(ret)); } diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java index cf4dea61e3e5..a97bcea5d415 100644 --- a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java +++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java @@ -79,7 +79,8 @@ Stream generateGoodTests() { "select * from music where title contains \"madonna\" and !(title contains \"saint\")", "select * from music where text contains phrase(\"st\", \"louis\", \"blues\")", "select * from music where persons contains sameElement(first_name contains 'Joe', last_name contains 'Smith', year_of_birth < 1940)", - // "select * from music where identities contains sameElement(key contains 'father', value.first_name contains 'Joe', value.last_name contains 'Smith', value.year_of_birth < 1940)", + "select * from music where identities contains sameElement(key contains 'father', value.first_name contains 'Joe', value.last_name contains 'Smith', value.year_of_birth < 1940)", + // "select * from music where gradparentStruct.parentStruct.childField contains 'madonna'", "select * from music where fieldName contains equiv(\"A\",\"B\")", "select * from music where myUrlField contains uri(\"vespa.ai/foo\")", "select * from music where myStringAttribute contains ({prefixLength:1, maxEditDistance:2}fuzzy(\"parantesis\"))", From d0a3e8bee4888482b223d3df61c5175e243cedf0 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Thu, 14 Nov 2024 10:31:30 +0100 Subject: [PATCH 011/126] Add back original constructor (needed by older config models) --- .../java/com/yahoo/config/provision/CapacityPolicies.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java index 3b2fa0df14ad..6c856baeb3e9 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java @@ -19,6 +19,11 @@ public class CapacityPolicies { public record Tuning(Architecture adminClusterArchitecture, double logserverMemoryGiB, double clusterControllerMemoryGiB) { + + public Tuning(Architecture adminClusterArchitecture, double logserverMemoryGiB) { + this(adminClusterArchitecture, logserverMemoryGiB, 0.0); + } + double logserverMem(double v) { double override = logserverMemoryGiB(); return (override > 0) ? override : v; From fce7cd187efab53511050cb8d9156dab82412678 Mon Sep 17 00:00:00 2001 From: gjoranv Date: Tue, 12 Nov 2024 16:56:08 +0100 Subject: [PATCH 012/126] Remove temporary code for debug logging --- .../vespa/config/server/session/SessionRepository.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index 4787b76e4060..f01cb1f3cc47 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -14,7 +14,6 @@ import com.yahoo.config.model.application.provider.DeployData; import com.yahoo.config.model.application.provider.FilesApplicationPackage; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; import com.yahoo.container.jdisc.secretstore.SecretStore; @@ -578,11 +577,6 @@ private void childEvent(CuratorFramework ignored, PathChildrenCacheEvent event) private void write(Session existingSession, LocalSession session, ApplicationId applicationId, Instant created) { - // TODO: remove when tenant secret store integration test passes - var tenantSecretStores = existingSession.getTenantSecretStores(); - if (! tenantSecretStores.isEmpty() && zone.system().isPublic() && zone.cloud().name().equals(CloudName.AWS)) { - tenantSecretStores.forEach(ss -> log.info("Existing tenant secret store:\n" + ss)); - } SessionSerializer sessionSerializer = new SessionSerializer(); sessionSerializer.write(session.getSessionZooKeeperClient(), applicationId, @@ -592,7 +586,7 @@ private void write(Session existingSession, LocalSession session, ApplicationId existingSession.getVespaVersion(), existingSession.getAthenzDomain(), existingSession.getQuota(), - tenantSecretStores, + existingSession.getTenantSecretStores(), existingSession.getOperatorCertificates(), existingSession.getCloudAccount(), existingSession.getDataplaneTokens(), From 7c3b35dd684b8b973019f99bb6476e196e9fceb6 Mon Sep 17 00:00:00 2001 From: gjoranv Date: Tue, 12 Nov 2024 15:43:44 +0100 Subject: [PATCH 013/126] Add a TenantVault with serializer for deployment data --- .../yahoo/config/model/api/TenantVault.java | 13 +++ .../server/tenant/TenantVaultSerializer.java | 84 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 config-model-api/src/main/java/com/yahoo/config/model/api/TenantVault.java create mode 100644 configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/TenantVault.java b/config-model-api/src/main/java/com/yahoo/config/model/api/TenantVault.java new file mode 100644 index 000000000000..99a87f3936a5 --- /dev/null +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/TenantVault.java @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.config.model.api; + +import java.util.List; + +/** + * @author gjoranv + */ +public record TenantVault(String id, String name, String externalId, List secrets) { + + public record Secret(String id, String name) { } + +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java new file mode 100644 index 000000000000..c7432440157a --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java @@ -0,0 +1,84 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.config.server.tenant; + +import com.yahoo.config.model.api.TenantVault; +import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Slime; +import com.yahoo.slime.Type; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author gjoranv + */ +public class TenantVaultSerializer { + + // WARNING: Since there are multiple servers in a ZooKeeper cluster, and they upgrade one by one + // (and rewrite all nodes on startup), changes to the serialized format must be made + // such that what is serialized on version N+1 can be read by version N: + // - ADDING FIELDS: Always ok + // - REMOVING FIELDS: Stop reading the field first. Stop writing it on a later version. + // - CHANGING THE FORMAT OF A FIELD: Don't do it, bro. + + private static final String idField = "id"; + private static final String nameField = "name"; + private static final String externalIdField = "externalId"; + private static final String secretsArray = "secrets"; + + public static Slime toSlime(List vaults) { + Slime slime = new Slime(); + Cursor cursor = slime.setArray(); + toSlime(vaults, cursor); + return slime; + } + + public static void toSlime(List vaults, Cursor cursor) { + vaults.forEach(tenantVault -> toSlime(tenantVault, cursor.addObject())); + } + + private static void toSlime(TenantVault vault, Cursor object) { + object.setString(idField, vault.id()); + object.setString(nameField, vault.name()); + object.setString(externalIdField, vault.externalId()); + Cursor secrets = object.setArray(secretsArray); + vault.secrets().forEach(secret -> toSlime(secret, secrets.addObject())); + } + + private static void toSlime(TenantVault.Secret secret, Cursor object) { + object.setString("name", secret.name()); + object.setString("id", secret.id()); + } + + public static TenantVault fromSlime(Inspector inspector) { + if (inspector.type() == Type.OBJECT) { + return new TenantVault( + inspector.field(idField).asString(), + inspector.field(nameField).asString(), + inspector.field(externalIdField).asString(), + secretsFromSlime(inspector.field(secretsArray))); + } + throw new IllegalArgumentException("Unknown format encountered for tenant vaults!"); + } + + private static List secretsFromSlime(Inspector inspector) { + List secrets = new ArrayList<>(); + inspector.traverse(((ArrayTraverser)(idx, secret) -> secrets.add(secretFromSlime(secret)))); + return secrets; + } + + private static TenantVault.Secret secretFromSlime(Inspector inspector) { + return new TenantVault.Secret( + inspector.field("name").asString(), + inspector.field("id").asString()); + } + + public static List listFromSlime(Inspector inspector) { + List tenantVaults = new ArrayList<>(); + inspector.traverse(((ArrayTraverser)(idx, vault) -> tenantVaults.add(fromSlime(vault)))); + return tenantVaults; + } + +} From 5fb9fed7443b8939e7074f3ef9c94bd4515492ee Mon Sep 17 00:00:00 2001 From: gjoranv Date: Tue, 12 Nov 2024 17:17:57 +0100 Subject: [PATCH 014/126] Update abi spec --- config-model-api/abi-spec.json | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json index fc534be94dab..e61c2a196ba8 100644 --- a/config-model-api/abi-spec.json +++ b/config-model-api/abi-spec.json @@ -1390,6 +1390,7 @@ "public java.util.Optional endpointCertificateSecrets()", "public java.util.Optional athenzDomain()", "public com.yahoo.config.model.api.Quota quota()", + "public java.util.List tenantVaults()", "public java.util.List tenantSecretStores()", "public java.lang.String jvmGCOptions()", "public abstract java.lang.String jvmGCOptions(java.util.Optional)", @@ -1798,6 +1799,44 @@ ], "fields" : [ ] }, + "com.yahoo.config.model.api.TenantVault$Secret" : { + "superClass" : "java.lang.Record", + "interfaces" : [ ], + "attributes" : [ + "public", + "final", + "record" + ], + "methods" : [ + "public void (java.lang.String, java.lang.String)", + "public final java.lang.String toString()", + "public final int hashCode()", + "public final boolean equals(java.lang.Object)", + "public java.lang.String name()", + "public java.lang.String id()" + ], + "fields" : [ ] + }, + "com.yahoo.config.model.api.TenantVault" : { + "superClass" : "java.lang.Record", + "interfaces" : [ ], + "attributes" : [ + "public", + "final", + "record" + ], + "methods" : [ + "public void (java.lang.String, java.lang.String, java.lang.String, java.util.List)", + "public final java.lang.String toString()", + "public final int hashCode()", + "public final boolean equals(java.lang.Object)", + "public java.lang.String id()", + "public java.lang.String name()", + "public java.lang.String externalId()", + "public java.util.List secrets()" + ], + "fields" : [ ] + }, "com.yahoo.config.model.api.ValidationParameters$CheckRouting" : { "superClass" : "java.lang.Enum", "interfaces" : [ ], From 44e4a2eaed777d56055b8544f411dfc5e16bb6ed Mon Sep 17 00:00:00 2001 From: gjoranv Date: Wed, 13 Nov 2024 11:48:01 +0100 Subject: [PATCH 015/126] Propagate info about tenant vaults to configserver --- .../yahoo/config/model/api/ModelContext.java | 2 ++ .../config/server/deploy/Deployment.java | 1 + .../server/deploy/ModelContextImpl.java | 9 ++++++ .../modelfactory/ActivatedModelsBuilder.java | 5 +-- .../modelfactory/PreparedModelsBuilder.java | 1 + .../config/server/session/PrepareParams.java | 27 +++++++++++++++- .../vespa/config/server/session/Session.java | 6 ++++ .../config/server/session/SessionData.java | 8 +++++ .../server/session/SessionPreparer.java | 5 ++- .../server/session/SessionRepository.java | 1 + .../server/session/SessionSerializer.java | 8 +++-- .../session/SessionZooKeeperClient.java | 22 +++++++++++++ .../server/tenant/TenantVaultSerializer.java | 4 +-- .../config/server/ModelContextImplTest.java | 1 + .../server/session/PrepareParamsTest.java | 32 +++++++++++++++++++ .../session/SessionZooKeeperClientTest.java | 18 ++++++++++- 16 files changed, 139 insertions(+), 11 deletions(-) diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index ecb1212b4b4b..32e830d3e792 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -150,6 +150,8 @@ interface Properties { default Quota quota() { return Quota.unlimited(); } + default List tenantVaults() { return List.of(); } + default List tenantSecretStores() { return List.of(); } // Default setting for the gc-options attribute if not specified explicit by application diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java index aec6fd8f1fe5..6a5aabd0410f 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java @@ -255,6 +255,7 @@ private static Supplier createPrepareParams( .isInternalRedeployment(isInternalRedeployment) .force(force) .waitForResourcesInPrepare(waitForResourcesInPrepare) + .tenantVaults(session.getTenantVaults()) .tenantSecretStores(session.getTenantSecretStores()) .dataplaneTokens(session.getDataplaneTokens()); session.getDockerImageRepository().ifPresent(params::dockerImageRepository); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index c81a3a4c4475..ef4aefff52fa 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -18,6 +18,7 @@ import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.Reindexing; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -350,6 +351,7 @@ public static class Properties implements ModelContext.Properties { private final Optional endpointCertificateSecrets; private final Optional athenzDomain; private final Quota quota; + private final List tenantVaults; private final List tenantSecretStores; private final SecretStore secretStore; private final StringFlag jvmGCOptionsFlag; @@ -376,6 +378,7 @@ public Properties(ApplicationId applicationId, Optional endpointCertificateSecrets, Optional athenzDomain, Optional maybeQuota, + List tenantVaults, List tenantSecretStores, SecretStore secretStore, List operatorCertificates, @@ -397,6 +400,7 @@ public Properties(ApplicationId applicationId, this.endpointCertificateSecrets = endpointCertificateSecrets; this.athenzDomain = athenzDomain; this.quota = maybeQuota.orElseGet(Quota::unlimited); + this.tenantVaults = tenantVaults; this.tenantSecretStores = tenantSecretStores; this.secretStore = secretStore; this.jvmGCOptionsFlag = PermanentFlags.JVM_GC_OPTIONS.bindTo(flagSource) @@ -469,6 +473,11 @@ public String athenzDnsSuffix() { @Override public Quota quota() { return quota; } + @Override + public List tenantVaults() { + return tenantVaults; + } + @Override public List tenantSecretStores() { return tenantSecretStores; diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java index 5017f25b2f83..6fc5a0881872 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java @@ -14,7 +14,6 @@ import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.application.provider.MockFileRegistry; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; @@ -35,9 +34,6 @@ import com.yahoo.vespa.config.server.tenant.TenantRepository; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.model.VespaModel; -import com.yahoo.vespa.model.container.ApplicationContainerCluster; -import com.yahoo.vespa.model.content.cluster.ContentCluster; import java.util.Comparator; import java.util.List; @@ -168,6 +164,7 @@ private ModelContext.Properties createModelContextProperties(ApplicationId appli .flatMap(new EndpointCertificateRetriever(endpointCertificateSecretStores)::readEndpointCertificateSecrets), zkClient.readAthenzDomain(), zkClient.readQuota(), + zkClient.readTenantVaults(), zkClient.readTenantSecretStores(), secretStore, zkClient.readOperatorCertificates(), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java index fd8728ac655d..7a343143c843 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java @@ -222,6 +222,7 @@ private ModelContext.Properties createModelContextProperties(Version modelVersio endpointCertificateSecrets, params.athenzDomain(), params.quota(), + params.tenantVaults(), params.tenantSecretStores(), secretStore, params.operatorCertificates(), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java index ec24cc17284f..c46cc8044049 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java @@ -6,6 +6,7 @@ import com.yahoo.config.model.api.EndpointCertificateMetadata; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -24,6 +25,7 @@ import com.yahoo.vespa.config.server.tenant.DataplaneTokenSerializer; import com.yahoo.vespa.config.server.tenant.EndpointCertificateMetadataSerializer; import com.yahoo.vespa.config.server.tenant.TenantSecretStoreSerializer; +import com.yahoo.vespa.config.server.tenant.TenantVaultSerializer; import java.security.cert.X509Certificate; import java.time.Clock; @@ -32,7 +34,6 @@ import java.util.Objects; import java.util.Optional; import java.util.function.Function; -import java.util.stream.Collectors; /** * Parameters for preparing an application. Immutable. @@ -52,6 +53,7 @@ public final class PrepareParams { static final String DOCKER_IMAGE_REPOSITORY = "dockerImageRepository"; static final String ATHENZ_DOMAIN = "athenzDomain"; static final String QUOTA_PARAM_NAME = "quota"; + static final String TENANT_VAULTS_PARAM_NAME = "tenantVaults"; static final String TENANT_SECRET_STORES_PARAM_NAME = "tenantSecretStores"; static final String FORCE_PARAM_NAME = "force"; static final String WAIT_FOR_RESOURCES_IN_PREPARE = "waitForResourcesInPrepare"; @@ -74,6 +76,7 @@ public final class PrepareParams { private final Optional dockerImageRepository; private final Optional athenzDomain; private final Optional quota; + private final List tenantVaults; private final List tenantSecretStores; private final List operatorCertificates; private final Optional cloudAccount; @@ -92,6 +95,7 @@ private PrepareParams(ApplicationId applicationId, Optional dockerImageRepository, Optional athenzDomain, Optional quota, + List tenantVaults, List tenantSecretStores, boolean force, boolean waitForResourcesInPrepare, @@ -111,6 +115,7 @@ private PrepareParams(ApplicationId applicationId, this.dockerImageRepository = dockerImageRepository; this.athenzDomain = athenzDomain; this.quota = quota; + this.tenantVaults = tenantVaults; this.tenantSecretStores = tenantSecretStores; this.force = force; this.waitForResourcesInPrepare = waitForResourcesInPrepare; @@ -136,6 +141,7 @@ public static class Builder { private Optional dockerImageRepository = Optional.empty(); private Optional athenzDomain = Optional.empty(); private Optional quota = Optional.empty(); + private List tenantVaults = List.of(); private List tenantSecretStores = List.of(); private List operatorCertificates = List.of(); private Optional cloudAccount = Optional.empty(); @@ -250,6 +256,18 @@ public Builder quota(String serialized) { return this; } + public Builder tenantVaults(String serialized) { + List vaults = (serialized == null) + ? List.of() + : TenantVaultSerializer.listFromSlime(SlimeUtils.jsonToSlime(serialized).get()); + return tenantVaults(vaults); + } + + public Builder tenantVaults(List tenantVaults) { + this.tenantVaults = tenantVaults; + return this; + } + public Builder tenantSecretStores(String serialized) { List secretStores = (serialized == null) ? List.of() @@ -301,6 +319,7 @@ public PrepareParams build() { dockerImageRepository, athenzDomain, quota, + tenantVaults, tenantSecretStores, force, waitForResourcesInPrepare, @@ -323,6 +342,7 @@ public static PrepareParams fromHttpRequest(HttpRequest request, TenantName tena .dockerImageRepository(request.getProperty(DOCKER_IMAGE_REPOSITORY)) .athenzDomain(request.getProperty(ATHENZ_DOMAIN)) .quota(request.getProperty(QUOTA_PARAM_NAME)) + .tenantVaults(request.getProperty(TENANT_VAULTS_PARAM_NAME)) .tenantSecretStores(request.getProperty(TENANT_SECRET_STORES_PARAM_NAME)) .force(request.getBooleanProperty(FORCE_PARAM_NAME)) .waitForResourcesInPrepare(request.getBooleanProperty(WAIT_FOR_RESOURCES_IN_PREPARE)) @@ -345,6 +365,7 @@ public static PrepareParams fromJson(byte[] json, TenantName tenant, Duration ba .dockerImageRepository(SlimeUtils.optionalString(params.field(DOCKER_IMAGE_REPOSITORY)).orElse(null)) .athenzDomain(SlimeUtils.optionalString(params.field(ATHENZ_DOMAIN)).orElse(null)) .quota(deserialize(params.field(QUOTA_PARAM_NAME), Quota::fromSlime)) + .tenantVaults(deserialize(params.field(TENANT_VAULTS_PARAM_NAME), TenantVaultSerializer::listFromSlime, List.of())) .tenantSecretStores(deserialize(params.field(TENANT_SECRET_STORES_PARAM_NAME), TenantSecretStoreSerializer::listFromSlime, List.of())) .force(booleanValue(params, FORCE_PARAM_NAME)) .waitForResourcesInPrepare(booleanValue(params, WAIT_FOR_RESOURCES_IN_PREPARE)) @@ -458,6 +479,10 @@ public Optional quota() { return quota; } + public List tenantVaults() { + return tenantVaults; + } + public List tenantSecretStores() { return tenantSecretStores; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java index 39025aa8374a..1afc0cbcd52a 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java @@ -8,6 +8,7 @@ import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.AllocatedHosts; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; @@ -19,6 +20,7 @@ import com.yahoo.transaction.Transaction; import com.yahoo.vespa.config.server.application.ApplicationVersions; import com.yahoo.vespa.config.server.tenant.TenantRepository; + import java.security.cert.X509Certificate; import java.time.Instant; import java.util.List; @@ -142,6 +144,10 @@ public Transaction createDeactivateTransaction() { return createSetStatusTransaction(Status.DEACTIVATE); } + public List getTenantVaults() { + return sessionZooKeeperClient.readTenantVaults(); + } + public List getTenantSecretStores() { return sessionZooKeeperClient.readTenantSecretStores(); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java index 1757998882e4..a6180b37d5ac 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java @@ -5,6 +5,7 @@ import com.yahoo.config.FileReference; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -16,6 +17,7 @@ import com.yahoo.vespa.config.server.tenant.DataplaneTokenSerializer; import com.yahoo.vespa.config.server.tenant.OperatorCertificateSerializer; import com.yahoo.vespa.config.server.tenant.TenantSecretStoreSerializer; +import com.yahoo.vespa.config.server.tenant.TenantVaultSerializer; import java.io.IOException; import java.security.cert.X509Certificate; @@ -38,6 +40,7 @@ public record SessionData(ApplicationId applicationId, Optional dockerImageRepository, Optional athenzDomain, Optional quota, + List tenantVaults, List tenantSecretStores, List operatorCertificates, Optional cloudAccount, @@ -52,6 +55,7 @@ public record SessionData(ApplicationId applicationId, static final String DOCKER_IMAGE_REPOSITORY_PATH = "dockerImageRepository"; static final String ATHENZ_DOMAIN = "athenzDomain"; static final String QUOTA_PATH = "quota"; + static final String TENANT_VAULTS_PATH = "tenantVaults"; static final String TENANT_SECRET_STORES_PATH = "tenantSecretStores"; static final String OPERATOR_CERTIFICATES_PATH = "operatorCertificates"; static final String CLOUD_ACCOUNT_PATH = "cloudAccount"; @@ -79,6 +83,9 @@ private void toSlime(Cursor object) { athenzDomain.ifPresent(domain -> object.setString(ATHENZ_DOMAIN, domain.value())); quota.ifPresent(q -> q.toSlime(object.setObject(QUOTA_PATH))); + Cursor tenantVaultArray = object.setArray(TENANT_VAULTS_PATH); + TenantVaultSerializer.toSlime(tenantVaults, tenantVaultArray); + Cursor tenantSecretStoresArray = object.setArray(TENANT_SECRET_STORES_PATH); TenantSecretStoreSerializer.toSlime(tenantSecretStores, tenantSecretStoresArray); @@ -104,6 +111,7 @@ static SessionData fromSlime(Slime slime) { SlimeUtils.isPresent(cursor.field(QUOTA_PATH)) ? Optional.of(Quota.fromSlime(cursor.field(QUOTA_PATH))) : Optional.empty(), + TenantVaultSerializer.listFromSlime(cursor.field(TENANT_VAULTS_PATH)), TenantSecretStoreSerializer.listFromSlime(cursor.field(TENANT_SECRET_STORES_PATH)), OperatorCertificateSerializer.fromSlime(cursor.field(OPERATOR_CERTIFICATES_PATH)), optionalString(cursor.field(CLOUD_ACCOUNT_PATH)).map(CloudAccount::from), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java index 7f1d8678ed04..4c9cc7aca06e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java @@ -21,11 +21,11 @@ import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.AllocatedHosts; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; -import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.DataplaneToken; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.InstanceName; @@ -362,6 +362,7 @@ void writeStateZK(FileReference filereference) { prepareResult.allocatedHosts(), athenzDomain, params.quota(), + params.tenantVaults(), params.tenantSecretStores(), params.operatorCertificates(), params.cloudAccount(), @@ -406,6 +407,7 @@ private void writeStateToZooKeeper(SessionZooKeeperClient zooKeeperClient, AllocatedHosts allocatedHosts, Optional athenzDomain, Optional quota, + List tenantVaults, List tenantSecretStores, List operatorCertificates, Optional cloudAccount, @@ -422,6 +424,7 @@ private void writeStateToZooKeeper(SessionZooKeeperClient zooKeeperClient, vespaVersion, athenzDomain, quota, + tenantVaults, tenantSecretStores, operatorCertificates, cloudAccount, diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index f01cb1f3cc47..accf78fe82dd 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -586,6 +586,7 @@ private void write(Session existingSession, LocalSession session, ApplicationId existingSession.getVespaVersion(), existingSession.getAthenzDomain(), existingSession.getQuota(), + existingSession.getTenantVaults(), existingSession.getTenantSecretStores(), existingSession.getOperatorCertificates(), existingSession.getCloudAccount(), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java index 438db91721f9..fc6ea76eae01 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java @@ -5,6 +5,7 @@ import com.yahoo.config.FileReference; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -32,8 +33,9 @@ public class SessionSerializer { void write(SessionZooKeeperClient zooKeeperClient, ApplicationId applicationId, Instant created, Optional fileReference, Optional dockerImageRepository, Version vespaVersion, Optional athenzDomain, Optional quota, - List tenantSecretStores, List operatorCertificates, - Optional cloudAccount, List dataplaneTokens, ActivationTriggers activationTriggers, + List tenantVaults, List tenantSecretStores, + List operatorCertificates, Optional cloudAccount, + List dataplaneTokens, ActivationTriggers activationTriggers, BooleanFlag writeSessionData) { zooKeeperClient.writeApplicationId(applicationId); zooKeeperClient.writeApplicationPackageReference(fileReference); @@ -54,6 +56,7 @@ void write(SessionZooKeeperClient zooKeeperClient, ApplicationId applicationId, dockerImageRepository, athenzDomain, quota, + tenantVaults, tenantSecretStores, operatorCertificates, cloudAccount, @@ -81,6 +84,7 @@ private static SessionData readSessionDataFromLegacyPaths(SessionZooKeeperClient zooKeeperClient.readDockerImageRepository(), zooKeeperClient.readAthenzDomain(), zooKeeperClient.readQuota(), + zooKeeperClient.readTenantVaults(), zooKeeperClient.readTenantSecretStores(), zooKeeperClient.readOperatorCertificates(), zooKeeperClient.readCloudAccount(), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java index 1f000bc5856f..fd76a07a7d65 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java @@ -9,6 +9,7 @@ import com.yahoo.config.model.api.ConfigDefinitionRepo; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.AllocatedHosts; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; @@ -30,12 +31,14 @@ import com.yahoo.vespa.config.server.tenant.OperatorCertificateSerializer; import com.yahoo.vespa.config.server.tenant.TenantRepository; import com.yahoo.vespa.config.server.tenant.TenantSecretStoreSerializer; +import com.yahoo.vespa.config.server.tenant.TenantVaultSerializer; import com.yahoo.vespa.config.server.zookeeper.ZKApplication; import com.yahoo.vespa.config.server.zookeeper.ZKApplicationPackage; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.transaction.CuratorOperations; import com.yahoo.vespa.curator.transaction.CuratorTransaction; import org.apache.zookeeper.data.Stat; + import java.security.cert.X509Certificate; import java.time.Duration; import java.time.Instant; @@ -55,6 +58,7 @@ import static com.yahoo.vespa.config.server.session.SessionData.QUOTA_PATH; import static com.yahoo.vespa.config.server.session.SessionData.SESSION_DATA_PATH; import static com.yahoo.vespa.config.server.session.SessionData.TENANT_SECRET_STORES_PATH; +import static com.yahoo.vespa.config.server.session.SessionData.TENANT_VAULTS_PATH; import static com.yahoo.vespa.config.server.session.SessionData.VERSION_PATH; import static com.yahoo.vespa.config.server.zookeeper.ZKApplication.USER_DEFCONFIGS_ZK_SUBPATH; import static com.yahoo.vespa.curator.Curator.CompletionWaiter; @@ -203,6 +207,10 @@ private Path quotaPath() { return sessionPath.append(QUOTA_PATH); } + private Path tenantVaultPath() { + return sessionPath.append(TENANT_VAULTS_PATH); + } + private Path tenantSecretStorePath() { return sessionPath.append(TENANT_SECRET_STORES_PATH); } @@ -315,6 +323,20 @@ public Optional readQuota() { .map(slime -> Quota.fromSlime(slime.get())); } + public void writeTenantVaults(List tenantVaults) { + if (! tenantVaults.isEmpty()) { + var bytes = uncheck(() -> SlimeUtils.toJsonBytes(TenantVaultSerializer.toSlime(tenantVaults))); + curator.set(tenantVaultPath(), bytes); + } + } + + public List readTenantVaults() { + return curator.getData(tenantVaultPath()) + .map(SlimeUtils::jsonToSlime) + .map(slime -> TenantVaultSerializer.listFromSlime(slime.get())) + .orElse(List.of()); + } + public void writeTenantSecretStores(List tenantSecretStores) { if (!tenantSecretStores.isEmpty()) { var bytes = uncheck(() -> SlimeUtils.toJsonBytes(TenantSecretStoreSerializer.toSlime(tenantSecretStores))); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java index c7432440157a..c60b0bf05907 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java @@ -71,8 +71,8 @@ private static List secretsFromSlime(Inspector inspector) { private static TenantVault.Secret secretFromSlime(Inspector inspector) { return new TenantVault.Secret( - inspector.field("name").asString(), - inspector.field("id").asString()); + inspector.field("id").asString(), inspector.field("name").asString() + ); } public static List listFromSlime(Inspector inspector) { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java index 3289cc71357f..5541f8256fe4 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java @@ -74,6 +74,7 @@ public void testModelContextTest() { Optional.empty(), Optional.empty(), List.of(), + List.of(), new SecretStoreProvider().get(), List.of(), Optional.empty(), diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java index 3bbe13837fc0..1dca5459ae80 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java @@ -5,6 +5,7 @@ import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.EndpointCertificateMetadata; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.TenantName; @@ -18,6 +19,7 @@ import com.yahoo.vespa.config.server.tenant.ContainerEndpointSerializer; import com.yahoo.vespa.config.server.tenant.EndpointCertificateMetadataSerializer; import com.yahoo.vespa.config.server.tenant.TenantSecretStoreSerializer; +import com.yahoo.vespa.config.server.tenant.TenantVaultSerializer; import org.junit.Test; import java.io.IOException; @@ -170,6 +172,35 @@ public void testOperatorCertificates() throws IOException { assertEquals(certificate, prepareParams.operatorCertificates().get(0)); } + @Test + public void testTenantVaults() throws IOException { + List tenantVaults = List.of(new TenantVault( + "id", "name", "extId", + List.of(new TenantVault.Secret("sId", "sName")))); + + Slime tenantVaultSlime = TenantVaultSerializer.toSlime(tenantVaults); + String tenantVaultParam = new String(SlimeUtils.toJsonBytes(tenantVaultSlime), StandardCharsets.UTF_8); + + var prepareParams = createParams(request + "&" + PrepareParams.TENANT_VAULTS_PARAM_NAME + "=" + + URLEncoder.encode(tenantVaultParam, StandardCharsets.UTF_8), + TenantName.from("foo")); + + assertEquals(1, prepareParams.tenantVaults().size()); + TenantVault tenantVault = prepareParams.tenantVaults().get(0); + assertEquals("id", tenantVault.id()); + assertEquals("name", tenantVault.name()); + assertEquals("extId", tenantVault.externalId()); + assertEquals(1, tenantVault.secrets().size()); + assertEquals("sId", tenantVault.secrets().get(0).id()); + assertEquals("sName", tenantVault.secrets().get(0).name()); + + // Verify using json object + var root = SlimeUtils.jsonToSlime(json); + new Injector().inject(tenantVaultSlime.get(), new ObjectInserter(root.get(), PrepareParams.TENANT_VAULTS_PARAM_NAME)); + PrepareParams prepareParamsJson = PrepareParams.fromJson(SlimeUtils.toJsonBytes(root), TenantName.from("foo"), Duration.ofSeconds(60)); + assertPrepareParamsEqual(prepareParams, prepareParamsJson); + } + @Test public void testSecretStores() throws IOException { List secretStores = List.of(new TenantSecretStore("name", "awsId", "role", "extId")); @@ -213,6 +244,7 @@ private void assertPrepareParamsEqual(PrepareParams urlParams, PrepareParams jso assertEquals(urlParams.dockerImageRepository(), jsonParams.dockerImageRepository()); assertEquals(urlParams.athenzDomain(), jsonParams.athenzDomain()); assertEquals(urlParams.quota(), jsonParams.quota()); + assertEquals(urlParams.tenantVaults(), jsonParams.tenantVaults()); assertEquals(urlParams.tenantSecretStores(), jsonParams.tenantSecretStores()); } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java index d6631cc42b6f..0eba74a3c019 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java @@ -6,6 +6,7 @@ import com.yahoo.config.FileReference; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.TenantName; import com.yahoo.path.Path; @@ -146,6 +147,20 @@ public void require_quota_written_and_parsed() { assertEquals(quota, zkc.readQuota()); } + @Test + public void tenant_vaults_are_written_and_parsed() { + var vaults = List.of( + new TenantVault("id1", "name1", "extId1", + List.of(new TenantVault.Secret("sId1", "sName1"))), + new TenantVault("id2", "name2", "extId2", + List.of(new TenantVault.Secret("sId2", "sName2")))); + + var zkc = createSessionZKClient(4); + zkc.writeTenantVaults(vaults); + List actual = zkc.readTenantVaults(); + assertEquals(vaults, actual); + } + @Test public void require_tenant_secret_stores_written_and_parsed() { var secretStores = List.of( @@ -171,6 +186,7 @@ public void require_that_session_data_is_written_to_zk() { Optional.empty(), List.of(), List.of(), + List.of(), Optional.empty(), List.of(), ActivationTriggers.empty())); @@ -178,7 +194,7 @@ public void require_that_session_data_is_written_to_zk() { assertTrue(curator.exists(path)); String data = Utf8.toString(curator.getData(path).get()); assertTrue(data.contains("{\"applicationId\":\"default:default:default\",\"applicationPackageReference\":\"foo\",\"version\":\"8.195.1\",\"createTime\":")); - assertTrue(data.contains(",\"tenantSecretStores\":[],\"operatorCertificates\":[],\"dataplaneTokens\":[]," + + assertTrue(data.contains(",\"tenantVaults\":[],\"tenantSecretStores\":[],\"operatorCertificates\":[],\"dataplaneTokens\":[]," + "\"activationTriggers\":{\"nodeRestarts\":[],\"reindexings\":[]}")); } From 0843dc12efab396dd2cac44280c626971bd909d8 Mon Sep 17 00:00:00 2001 From: gjoranv Date: Wed, 13 Nov 2024 11:48:24 +0100 Subject: [PATCH 016/126] Propagate tenant vault name->id mapping as config - Create a new config asm-tenant-secret for mapping vaultName to vaultId and externalId - Move system and tenant config values to the new config --- .../config/model/deploy/TestProperties.java | 8 ++ .../model/container/xml/CloudAsmSecrets.java | 30 +++++-- .../model/container/xml/CloudSecrets.java | 2 +- .../container/xml/ContainerModelBuilder.java | 3 +- .../model/container/xml/SecretsTest.java | 88 ++++++++++++++----- .../configdefinitions/asm-secret.def | 3 +- .../configdefinitions/asm-tenant-secret.def | 10 +++ .../secret/aws/AsmTenantSecretReader.java | 26 ++++-- .../secret/aws/AsmTenantSecretReaderTest.java | 19 +++- 9 files changed, 153 insertions(+), 36 deletions(-) create mode 100644 container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 8f219b214830..aa4b3b89fa54 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -7,6 +7,7 @@ import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -54,6 +55,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private double feedConcurrency = 0.5; private double feedNiceness = 0.0; private int maxActivationInhibitedOutOfSyncGroups = 0; + private List tenantVaults = List.of(); private List tenantSecretStores = List.of(); private boolean allowDisableMtls = true; private List operatorCertificates = List.of(); @@ -114,6 +116,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public double feedConcurrency() { return feedConcurrency; } @Override public double feedNiceness() { return feedNiceness; } @Override public int maxActivationInhibitedOutOfSyncGroups() { return maxActivationInhibitedOutOfSyncGroups; } + @Override public List tenantVaults() { return tenantVaults; } @Override public List tenantSecretStores() { return tenantSecretStores; } @Override public boolean allowDisableMtls() { return allowDisableMtls; } @Override public List operatorCertificates() { return operatorCertificates; } @@ -278,6 +281,11 @@ public TestProperties maxActivationInhibitedOutOfSyncGroups(int nGroups) { return this; } + public TestProperties setTenantVaults(List tenantVaults) { + this.tenantVaults = List.copyOf(tenantVaults); + return this; + } + public TestProperties setTenantSecretStores(List secretStores) { this.tenantSecretStores = List.copyOf(secretStores); return this; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java index 6819548bea18..1618ba05e3b7 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java @@ -2,6 +2,8 @@ package com.yahoo.vespa.model.container.xml; import ai.vespa.secret.config.aws.AsmSecretConfig; +import ai.vespa.secret.config.aws.AsmTenantSecretConfig; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.TenantName; @@ -10,35 +12,53 @@ import com.yahoo.vespa.model.container.component.SimpleComponent; import java.net.URI; +import java.util.List; /** * @author lesters */ -public class CloudAsmSecrets extends SimpleComponent implements AsmSecretConfig.Producer { +public class CloudAsmSecrets extends SimpleComponent implements + AsmSecretConfig.Producer, + AsmTenantSecretConfig.Producer { - private static final String CLASS = "ai.vespa.secret.aws.AsmTenantSecretReader"; + static final String CLASS = "ai.vespa.secret.aws.AsmTenantSecretReader"; private static final String BUNDLE = "jdisc-cloud-aws"; private final URI ztsUri; private final AthenzDomain athenzDomain; private final SystemName system; private final TenantName tenant; + private final List tenantVaults; public CloudAsmSecrets(URI ztsUri, AthenzDomain athenzDomain, - SystemName system, TenantName tenant) { + SystemName system, TenantName tenant, + List tenantVaults) { super(new ComponentModel(BundleInstantiationSpecification.fromStrings(CLASS, CLASS, BUNDLE))); this.ztsUri = ztsUri; this.athenzDomain = athenzDomain; this.system = system; this.tenant = tenant; + this.tenantVaults = tenantVaults; } @Override public void getConfig(AsmSecretConfig.Builder builder) { builder.ztsUri(ztsUri.toString()) - .athenzDomain(athenzDomain.value()) - .system(system.value()) + .athenzDomain(athenzDomain.value()); + } + + @Override + public void getConfig(AsmTenantSecretConfig.Builder builder) { + builder.system(system.value()) .tenant(tenant.value()); + + tenantVaults.forEach(vault -> { + builder.vaults(vaultBuilder -> { + vaultBuilder.id(vault.id()) + .name(vault.name()) + .externalId(vault.externalId()); + }); + }); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudSecrets.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudSecrets.java index 50f499766795..b3332c475838 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudSecrets.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudSecrets.java @@ -14,7 +14,7 @@ */ public class CloudSecrets extends SimpleComponent implements SecretsConfig.Producer { - private static final String CLASS = "ai.vespa.secret.aws.SecretsImpl"; + static final String CLASS = "ai.vespa.secret.aws.SecretsImpl"; private static final String BUNDLE = "jdisc-cloud-aws"; private final List secrets = new ArrayList<>(); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 72ac906b8e00..d88a131d9a40 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -318,7 +318,8 @@ private void addSecrets(ApplicationContainerCluster cluster, Element spec, Deplo cluster.addComponent(new CloudAsmSecrets(deployState.getProperties().ztsUrl(), deployState.getProperties().tenantSecretDomain(), deployState.zone().system(), - deployState.getProperties().applicationId().tenant())); + deployState.getProperties().applicationId().tenant(), + deployState.getProperties().tenantVaults())); } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java index c6d68be069e5..8e51813d3cee 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.model.container.xml; import ai.vespa.secret.config.SecretsConfig; +import ai.vespa.secret.config.aws.AsmTenantSecretConfig; import com.yahoo.component.ComponentId; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.model.builder.xml.test.DomBuilderTest; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.deploy.TestProperties; @@ -15,6 +17,8 @@ import org.junit.jupiter.api.Test; import org.w3c.dom.Element; +import java.util.List; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; @@ -23,45 +27,87 @@ */ public class SecretsTest extends ContainerModelBuilderTestBase { - private static final String IMPL_ID = "ai.vespa.secret.aws.SecretsImpl"; + private static final String SECRETS_IMPL_ID = CloudSecrets.CLASS; + @Test void testCloudSecretsNeedHosted() { - Element clusterElem = DomBuilderTest.parse( - "", - " ", - " ", - " ", - ""); - createModel(root, clusterElem); + createModel(root, containerXml()); ApplicationContainerCluster container = getContainerCluster("container"); - Component component = container.getComponentsMap().get(ComponentId.fromString(IMPL_ID)); + Component component = container.getComponentsMap().get(ComponentId.fromString(SECRETS_IMPL_ID)); assertNull(component); } @Test void testSecretsCanBeSetUp() { - Element clusterElem = DomBuilderTest.parse( - "", - " ", - " ", - " ", - ""); DeployState state = new DeployState.Builder() .properties(new TestProperties().setHostedVespa(true)) .zone(new Zone(SystemName.Public, Environment.prod, RegionName.defaultName())) .build(); - createModel(root, state, null, clusterElem); + createModel(root, state, null, containerXml()); ApplicationContainerCluster container = getContainerCluster("container"); - assertComponentConfigured(container, IMPL_ID); - CloudSecrets secrets = (CloudSecrets) container.getComponentsMap().get(ComponentId.fromString(IMPL_ID)); + assertComponentConfigured(container, SECRETS_IMPL_ID); + var secretsConfig = getSecretsConfig(container); + + assertEquals(1, secretsConfig.secret().size()); + assertEquals("openai-apikey", secretsConfig.secret("openAiApiKey").name()); + } + + @Test + void tenant_vaults_are_propagated_in_config() { + var tenantVaults = List.of( + new TenantVault("id1", "name1", "externalId1", List.of()), + new TenantVault("id2", "name2", "externalId2", List.of())); + + var deployState = new DeployState.Builder() + .properties(new TestProperties() + .setHostedVespa(true) + .setTenantVaults(tenantVaults)) + .zone(new Zone(SystemName.Public, Environment.prod, RegionName.defaultName())) + .build(); + + createModel(root, deployState, null, containerXml()); + ApplicationContainerCluster container = getContainerCluster("container"); + + var config = getAsmTenantSecretConfig(container); + assertEquals(SystemName.Public.value(), config.system()); + assertEquals("default", config.tenant()); + + var vaults = config.vaults(); + assertEquals(2, vaults.size()); + + assertEquals("id1", vaults.get(0).id()); + assertEquals("name1", vaults.get(0).name()); + assertEquals("externalId1", vaults.get(0).externalId()); + + assertEquals("id2", vaults.get(1).id()); + assertEquals("name2", vaults.get(1).name()); + assertEquals("externalId2", vaults.get(1).externalId()); + } + + private static AsmTenantSecretConfig getAsmTenantSecretConfig(ApplicationContainerCluster container) { + var secrets = (CloudAsmSecrets) container.getComponentsMap().get(ComponentId.fromString(CloudAsmSecrets.CLASS)); + + AsmTenantSecretConfig.Builder configBuilder = new AsmTenantSecretConfig.Builder(); + secrets.getConfig(configBuilder); + return configBuilder.build(); + } + + private static SecretsConfig getSecretsConfig(ApplicationContainerCluster container) { + var secrets = (CloudSecrets) container.getComponentsMap().get(ComponentId.fromString(SECRETS_IMPL_ID)); SecretsConfig.Builder configBuilder = new SecretsConfig.Builder(); secrets.getConfig(configBuilder); - SecretsConfig secretsConfig = configBuilder.build(); + return configBuilder.build(); + } - assertEquals(1, secretsConfig.secret().size()); - assertEquals("openai-apikey", secretsConfig.secret("openAiApiKey").name()); + private static Element containerXml() { + return DomBuilderTest.parse( + "", + " ", + " ", + " ", + ""); } } diff --git a/container-disc/src/main/resources/configdefinitions/asm-secret.def b/container-disc/src/main/resources/configdefinitions/asm-secret.def index 268060131aea..d9a2ce3743d8 100644 --- a/container-disc/src/main/resources/configdefinitions/asm-secret.def +++ b/container-disc/src/main/resources/configdefinitions/asm-secret.def @@ -5,7 +5,6 @@ package=ai.vespa.secret.config.aws ztsUri string athenzDomain string default="" -# TODO: move to a separaet config (and remove defaults). Only used by AsmTenantSecretReader -# Used to create the athenz role name when retrieving secrets on behalf of a tenant +# TODO: unused, remove after model version xxx.yyy has rolled out of hosted Vespa tenant string default="" system string default="" diff --git a/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def b/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def new file mode 100644 index 000000000000..61f9119594ba --- /dev/null +++ b/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def @@ -0,0 +1,10 @@ +package=ai.vespa.secret.config.aws + +# Used to create the athenz role name when retrieving secrets on behalf of a tenant +system string +tenant string + +# Mapping used to create the AWS role name to assume for retrieving secrets +vaults[].id string +vaults[].name string +vaults[].externalId string diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java index ac6d8caa1878..9fc560ebd5bd 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java @@ -2,13 +2,17 @@ package ai.vespa.secret.aws; import ai.vespa.secret.config.aws.AsmSecretConfig; +import ai.vespa.secret.config.aws.AsmTenantSecretConfig; import ai.vespa.secret.model.Key; +import ai.vespa.secret.model.VaultId; import ai.vespa.secret.model.VaultName; import com.yahoo.component.annotation.Inject; import com.yahoo.vespa.athenz.identity.ServiceIdentityProvider; import software.amazon.awssdk.services.secretsmanager.SecretsManagerClient; +import java.util.Map; import java.util.function.Function; +import java.util.stream.Collectors; /** * Secret reader for tenant nodes. @@ -19,20 +23,32 @@ public final class AsmTenantSecretReader extends AsmSecretReader { private final String system; private final String tenant; + private final Map vaultIds; @Inject - public AsmTenantSecretReader(AsmSecretConfig config, ServiceIdentityProvider identities) { - super(config, identities); - this.system = config.system(); - this.tenant = config.tenant(); + public AsmTenantSecretReader(AsmSecretConfig secretConfig, + AsmTenantSecretConfig tenantConfig, + ServiceIdentityProvider identities) { + super(secretConfig, identities); + this.system = tenantConfig.system(); + this.tenant = tenantConfig.tenant(); + this.vaultIds = createVaultIdMap(tenantConfig); } // For testing AsmTenantSecretReader(Function clientAndCredentialsSupplier, - String system, String tenant) { + String system, String tenant, Map vaultIds) { super(clientAndCredentialsSupplier); this.system = system; this.tenant = tenant; + this.vaultIds = vaultIds; + } + + static Map createVaultIdMap(AsmTenantSecretConfig config) { + // Note: we can rightfully assume that the vaults are unique by name for a tenant. + return config.vaults().stream() + .map(vault -> Map.entry(VaultName.of(vault.name()), VaultId.of(vault.id()))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } @Override diff --git a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java index ecea026a03fd..1aab91d4dc0f 100644 --- a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java +++ b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java @@ -2,15 +2,18 @@ import ai.vespa.secret.aws.testutil.AsmSecretReaderTester; import ai.vespa.secret.aws.testutil.AsmSecretTesterBase.SecretVersion; +import ai.vespa.secret.config.aws.AsmTenantSecretConfig; import ai.vespa.secret.model.Key; import ai.vespa.secret.model.Secret; import ai.vespa.secret.model.SecretName; import ai.vespa.secret.model.SecretVersionId; import ai.vespa.secret.model.SecretVersionState; +import ai.vespa.secret.model.VaultId; import ai.vespa.secret.model.VaultName; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import java.util.Map; import java.util.Set; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -40,7 +43,7 @@ void reset() { } AsmTenantSecretReader secretReader() { - return new AsmTenantSecretReader(tester::newClient, system, tenant); + return new AsmTenantSecretReader(tester::newClient, system, tenant, Map.of()); } @Test @@ -159,6 +162,20 @@ void it_returns_empty_list_of_versions_for_unknown_secret() { } } + @Test + void it_creates_map_from_vaultName_to_vaultId_from_config() { + var config = new AsmTenantSecretConfig.Builder() + .system(system) + .tenant(tenant) + .vaults(builder -> builder.name("vault1").id("id1").externalId("ext1")) + .vaults(builder -> builder.name("vault2").id("id2").externalId("ext2")); + + Map idMap = AsmTenantSecretReader.createVaultIdMap(config.build()); + assertEquals(2, idMap.size()); + assertEquals(VaultId.of("id1"), idMap.get(VaultName.of("vault1"))); + assertEquals(VaultId.of("id2"), idMap.get(VaultName.of("vault2"))); + } + private void assertSame(SecretVersion version, Secret secret) { assertEquals(version.value(), secret.secretAsString()); assertEquals(version.version(), secret.version().value()); From 1a199100bf6a729b487a610d127ccdfa0a052fd7 Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Thu, 14 Nov 2024 11:33:47 +0100 Subject: [PATCH 017/126] fix: All YQL tests now passes --- .../src/main/ccc/yqlplus/YQLPlus.ccc | 6 ++--- .../java/ai/vespa/schemals/YQLParserTest.java | 22 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc b/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc index 4448c372c87a..8b7555013b95 100644 --- a/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc +++ b/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc @@ -466,7 +466,7 @@ argument(boolean in_select): expression(boolean select): ( null_operator - | annotate_expression + | (SCAN annotate_expression => annotate_expression) | logical_OR_expression ) ; @@ -515,7 +515,7 @@ equality_expression: in_not_in_target: ( // TODO: Add expression stack peek - ( select_statement ) + SCAN 2 => ( select_statement ) | literal_list ) ; @@ -609,7 +609,7 @@ primary_expression: ( expression(in_select) ) | constant_expression | ( - (SCAN 4 => call_expression(in_select)) // WARNING: The scan number could be very large. This will catch all Myfield.MyChild. However nested structs will not parse + (SCAN namespaced_name => call_expression(in_select) ) | fieldref ) ) diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java index a97bcea5d415..15338f4eb5f9 100644 --- a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java +++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java @@ -80,7 +80,7 @@ Stream generateGoodTests() { "select * from music where text contains phrase(\"st\", \"louis\", \"blues\")", "select * from music where persons contains sameElement(first_name contains 'Joe', last_name contains 'Smith', year_of_birth < 1940)", "select * from music where identities contains sameElement(key contains 'father', value.first_name contains 'Joe', value.last_name contains 'Smith', value.year_of_birth < 1940)", - // "select * from music where gradparentStruct.parentStruct.childField contains 'madonna'", + "select * from music where gradparentStruct.parentStruct.childField contains 'madonna'", "select * from music where fieldName contains equiv(\"A\",\"B\")", "select * from music where myUrlField contains uri(\"vespa.ai/foo\")", "select * from music where myStringAttribute contains ({prefixLength:1, maxEditDistance:2}fuzzy(\"parantesis\"))", @@ -90,21 +90,21 @@ Stream generateGoodTests() { "select * from sources * where vendor contains \"brick and mortar\" AND price < 50 AND userQuery()", "select * from music where rank(a contains \"A\", b contains \"B\", c contains \"C\")", "select * from music where rank(nearestNeighbor(field, queryVector), a contains \"A\", b contains \"B\", c contains \"C\")", - // "select * from music where integer_field in (10, 20, 30)", - // "select * from music where string_field in ('germany', 'france', 'norway')", - // "select * from music where integer_field in (@integer_values)", - // "select * from music where string_field in (@string_values)", - // "select * from music where dotProduct(description, {\"a\":1, \"b\":2})", - // "select * from music where weightedSet(description, {\"a\":1, \"b\":2})", - // "select * from music where wand(description, [[11,1], [37,2]])", - // "select * from music where ({scoreThreshold: 0.13, targetHits: 7}wand(description, {\"a\":1, \"b\":2}))", + "select * from music where integer_field in (10, 20, 30)", + "select * from music where string_field in ('germany', 'france', 'norway')", + "select * from music where integer_field in (@integer_values)", + "select * from music where string_field in (@string_values)", + "select * from music where dotProduct(description, {\"a\":1, \"b\":2})", + "select * from music where weightedSet(description, {\"a\":1, \"b\":2})", + "select * from music where wand(description, [[11,1], [37,2]])", + "select * from music where ({scoreThreshold: 0.13, targetHits: 7}wand(description, {\"a\":1, \"b\":2}))", "select * from music where weakAnd(a contains \"A\", b contains \"B\")", "select * from music where ({targetHits: 7}weakAnd(a contains \"A\", b contains \"B\"))", "select * from music where geoLocation(myfieldname, 63.5, 10.5, \"200 km\")", "select * from music where ({targetHits: 10}nearestNeighbor(doc_vector, query_vector))&input.query(query_vector)=[3,5,7]", "select * from sources * where bar contains \"a\" and nonEmpty(bar contains \"bar\" and foo contains @foo)", - // "select * from music where predicate(predicate_field,{\"gender\":\"Female\"},{\"age\":20L})", - // "select * from music where predicate(predicate_field,0,{\"age\":20L})", + "select * from music where predicate(predicate_field,{\"gender\":\"Female\"},{\"age\":20L})", + "select * from music where predicate(predicate_field,0,{\"age\":20L})", "select * from music where title contains \"madonna\" order by price asc, releasedate desc", "select * from music where title contains \"madonna\" order by {function: \"uca\", locale: \"en_US\", strength: \"IDENTICAL\"}other desc, {function: \"lowercase\"}something", "select * from music where title contains \"madonna\" limit 31 offset 29", From 135ad220ee440a7fd359d0fe7b2713aad00660d3 Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Thu, 14 Nov 2024 12:20:09 +0100 Subject: [PATCH 018/126] teat: Add test for new tokens in grouping language --- .../src/main/ccc/grouping/GroupingParser.ccc | 6 +++++ .../ai/vespa/schemals/ParserTokensTest.java | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/integration/schema-language-server/language-server/src/main/ccc/grouping/GroupingParser.ccc b/integration/schema-language-server/language-server/src/main/ccc/grouping/GroupingParser.ccc index f8f18598eca9..a84f2392585b 100644 --- a/integration/schema-language-server/language-server/src/main/ccc/grouping/GroupingParser.ccc +++ b/integration/schema-language-server/language-server/src/main/ccc/grouping/GroupingParser.ccc @@ -40,6 +40,12 @@ INJECT GroupingParser: } +INJECT GroupingParserLexer: +{ + public static EnumSet getRegularTokens() { + return EnumSet.copyOf(regularTokens); + } +} TOKEN : diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java index 39ac8ba9928c..ce524259d26d 100644 --- a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java +++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java @@ -4,20 +4,28 @@ import java.lang.reflect.Field; import java.util.ArrayList; +import java.util.Arrays; import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestFactory; +import org.junit.jupiter.api.DynamicTest; import ai.vespa.schemals.parser.SchemaParserLexer; import ai.vespa.schemals.parser.indexinglanguage.IndexingParserLexer; import ai.vespa.schemals.parser.rankingexpression.RankingExpressionParserLexer; +import ai.vespa.schemals.parser.grouping.GroupingParserLexer; +import com.vladsch.flexmark.parser.Parser; import com.yahoo.schema.parser.SchemaParserConstants; import com.yahoo.vespa.indexinglanguage.parser.IndexingParserConstants; import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParserConstants; +import com.yahoo.search.grouping.request.parser.GroupingParserConstants; + /** * Tests that the set of tokens declared in JavaCC parsers are also present in CongoCC parsers. @@ -102,4 +110,18 @@ public void testRankingExpressionTokenList() { List missing = findMissingTokens(javaCCFields, congoCCTokenStrings); assertEquals(0, missing.size(), "Missing ranking expression tokens in CongoCC: " + String.join(", ", missing)); } + + @Test + public void testVespaGroupingTokenList() { + Field[] javaCCFields = GroupingParserConstants.class.getDeclaredFields(); + + Set congoCCTokenStrings = new HashSet<>(); + + for (var tokenType : GroupingParserLexer.getRegularTokens()) { + congoCCTokenStrings.add(tokenType.toString()); + } + + List missing = findMissingTokens(javaCCFields, congoCCTokenStrings); + assertEquals(0, missing.size(), "Missing ranking expression tokens in CongoCC: " + String.join(", ", missing)); + } } From 6b8c8eb09afdebd7761d48e31dac0f43213b2825 Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Thu, 14 Nov 2024 12:44:33 +0100 Subject: [PATCH 019/126] Stop verifying PTR records --- .../com/yahoo/vespa/hosted/provision/node/Dns.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Dns.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Dns.java index 2e1ebdb05ac6..86185ba36701 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Dns.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Dns.java @@ -22,9 +22,14 @@ public class Dns { private Dns() {} + // TODO: Remove REVERSE after we have stopped adding those public enum RecordType { FORWARD, PUBLIC_FORWARD, REVERSE } /** Returns the set of DNS record types for a host and its children and the given version (ipv6), host type, etc. */ + public static Set recordTypesFor(IP.Version ipVersion, NodeType hostType, CloudName cloudName, boolean enclave) { + return recordTypesFor(ipVersion, hostType, cloudName, enclave, false); + } + public static Set recordTypesFor(IP.Version ipVersion, NodeType hostType, CloudName cloudName, boolean enclave, boolean allowReverse) { if (cloudName == CloudName.AWS || cloudName == CloudName.GCP) { if (enclave) { @@ -60,13 +65,12 @@ public static Set recordTypesFor(IP.Version ipVersion, NodeType host public static void verify(String hostname, String ipAddress, NodeType nodeType, NameResolver resolver, CloudAccount cloudAccount, Zone zone) { IP.Version version = IP.Version.fromIpAddress(ipAddress); - boolean allowReverse = !hostname.endsWith(".vespa-cloud.net"); - Set recordTypes = recordTypesFor(version, nodeType, zone.cloud().name(), cloudAccount.isEnclave(zone), allowReverse); + Set recordTypes = recordTypesFor(version, nodeType, zone.cloud().name(), cloudAccount.isEnclave(zone)); if (recordTypes.contains(RecordType.FORWARD)) { NameResolver.RecordType recordType = version.is6() ? NameResolver.RecordType.AAAA : NameResolver.RecordType.A; Set addresses = resolver.resolve(hostname, recordType); - if (!addresses.equals(java.util.Set.of(ipAddress))) + if (!addresses.equals(Set.of(ipAddress))) throw new IllegalArgumentException("Expected " + hostname + " to resolve to " + ipAddress + ", but got " + addresses); } From a0be0d894e8424ac9a7ed310526753a0f5396dff Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Thu, 14 Nov 2024 13:06:29 +0100 Subject: [PATCH 020/126] test: Add test to check for new token in yqlplus antlr parser --- .../src/main/ccc/yqlplus/YQLPlus.ccc | 7 ++++ .../ai/vespa/schemals/ParserTokensTest.java | 40 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc b/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc index 8b7555013b95..9dfee585d334 100644 --- a/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc +++ b/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc @@ -12,6 +12,13 @@ INJECT YQLPlusParser: protected Deque expression_stack = new ArrayDeque<>(); } +INJECT YQLPlusLexer: +{ + public static EnumSet getRegularTokens() { + return EnumSet.copyOf(regularTokens); + } +} + // -------------------------------------------------------------------------------- // // Token declarations. diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java index ce524259d26d..a4d22419e2c3 100644 --- a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java +++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java @@ -13,18 +13,21 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestFactory; +import org.antlr.v4.runtime.Vocabulary; import org.junit.jupiter.api.DynamicTest; import ai.vespa.schemals.parser.SchemaParserLexer; import ai.vespa.schemals.parser.indexinglanguage.IndexingParserLexer; import ai.vespa.schemals.parser.rankingexpression.RankingExpressionParserLexer; import ai.vespa.schemals.parser.grouping.GroupingParserLexer; +import ai.vespa.schemals.parser.yqlplus.YQLPlusLexer; import com.vladsch.flexmark.parser.Parser; import com.yahoo.schema.parser.SchemaParserConstants; import com.yahoo.vespa.indexinglanguage.parser.IndexingParserConstants; import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParserConstants; import com.yahoo.search.grouping.request.parser.GroupingParserConstants; +import com.yahoo.search.yql.yqlplusLexer; /** @@ -54,6 +57,11 @@ public class ParserTokensTest { "OCTAL" ); + public static Set antlrSpecialTokens = Set.of( + "COMMENT", + "WS" + ); + private List findMissingTokens(Field[] javaCCFields, Set congoCCTokenStrings) { List missing = new ArrayList<>(); @@ -124,4 +132,36 @@ public void testVespaGroupingTokenList() { List missing = findMissingTokens(javaCCFields, congoCCTokenStrings); assertEquals(0, missing.size(), "Missing ranking expression tokens in CongoCC: " + String.join(", ", missing)); } + + @Test + public void testYQLPlusTokenList() { + Vocabulary vocabulary = yqlplusLexer.VOCABULARY; + + Set antlrTokens = new HashSet<>(); + + for (int i = 0; i < vocabulary.getMaxTokenType(); i++) { + String symbolicName = vocabulary.getSymbolicName(i); + if (symbolicName != null) { + antlrTokens.add(symbolicName); + } + } + + Set congoCCTokenStrings = new HashSet<>(); + + for (var tokenType : YQLPlusLexer.getRegularTokens()) { + congoCCTokenStrings.add(tokenType.toString()); + } + + List missing = new ArrayList<>(); + for (var token : antlrTokens) { + if (antlrSpecialTokens.contains(token)) continue; + + if (!congoCCTokenStrings.contains(token)) { + missing.add(token); + } + } + + assertEquals(0, missing.size(), "Missing yqlplus tokens in CongoCC: " + String.join(", ", missing)); + + } } From 528be3e61cacb86c535d805c7524025da596ec64 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Thu, 14 Nov 2024 14:00:07 +0100 Subject: [PATCH 021/126] Require cloud account field --- .../hosted/provision/persistence/SnapshotSerializer.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java index 56e39374d255..9935542d3241 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java @@ -50,16 +50,13 @@ public static Snapshot fromInspector(Inspector object, CloudAccount systemAccoun Instant at = Instant.ofEpochMilli(inspector.field(AT_FIELD).asLong()); history.put(type, new Snapshot.History.Event(type, at)); }); - // TODO(mpolden): Require field after 2024-12-01 - CloudAccount cloudAccount = SlimeUtils.optionalString(object.field(CLOUD_ACCOUNT_FIELD)) - .map(CloudAccount::from) - .orElse(systemAccount); Optional encryptionKey = Optional.empty(); if (object.field(SEALED_SHARED_KEY_FIELD).valid()) { SealedSharedKey sharedKey = SealedSharedKey.fromTokenString(object.field(SEALED_SHARED_KEY_FIELD).asString()); SecretVersionId sealingKeyVersion = SecretVersionId.of(object.field(SEALING_KEY_VERSION).asString()); encryptionKey = Optional.of(new SnapshotKey(sharedKey, sealingKeyVersion)); } + CloudAccount cloudAccount = CloudAccount.from(object.field(CLOUD_ACCOUNT_FIELD).asString()); return new Snapshot(SnapshotId.of(object.field(ID_FIELD).asString()), HostName.of(object.field(HOSTNAME_FIELD).asString()), stateFromSlime(object.field(STATE_FIELD).asString()), From e8e5575d50c0e3a52be62c31c7845cf31f73aaeb Mon Sep 17 00:00:00 2001 From: Arnstein Ressem Date: Thu, 14 Nov 2024 14:02:09 +0100 Subject: [PATCH 022/126] Dont try to retag the version if the ref matches the current tag. --- dist/release-vespa-rpm.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dist/release-vespa-rpm.sh b/dist/release-vespa-rpm.sh index ffa4c8e3e2e7..87ff7f4c3403 100755 --- a/dist/release-vespa-rpm.sh +++ b/dist/release-vespa-rpm.sh @@ -17,10 +17,11 @@ readonly CURRENT_BRANCH=$(git branch | grep "^\*" | cut -d' ' -f2) git checkout master git pull --rebase -# Create a proper release tag - -git tag -a "$RELEASE_TAG" -m "Release version $VERSION" $GITREF -git push origin "$RELEASE_TAG" +# Create a proper release tag if not there +if [[ $(git show-ref --tags "$RELEASE_TAG" | awk '{print $1}') != "$GITREF" ]]; then + git tag -a "$RELEASE_TAG" -m "Release version $VERSION" $GITREF + git push origin "$RELEASE_TAG" +fi git reset --hard HEAD git checkout $CURRENT_BRANCH From d039fae4c5fc8091638ee8db69d454affe539731 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Thu, 14 Nov 2024 14:05:27 +0100 Subject: [PATCH 023/126] Make snapshot key non-optional --- .../vespa/hosted/provision/backup/Snapshot.java | 4 ++-- .../hosted/provision/backup/Snapshots.java | 7 ++----- .../persistence/SnapshotSerializer.java | 17 ++++++----------- .../hosted/provision/restapi/NodesResponse.java | 2 +- .../hosted/provision/backup/SnapshotTest.java | 12 ++++++++++-- .../hosted/provision/backup/SnapshotsTest.java | 10 ++++------ .../persistence/SnapshotSerializerTest.java | 5 ++--- 7 files changed, 27 insertions(+), 30 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshot.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshot.java index 0e1720e6bf33..0decd1130484 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshot.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshot.java @@ -21,7 +21,7 @@ * @author mpolden */ public record Snapshot(SnapshotId id, HostName hostname, State state, History history, ClusterId cluster, - int clusterIndex, CloudAccount cloudAccount, Optional key) { + int clusterIndex, CloudAccount cloudAccount, SnapshotKey key) { public Snapshot { Objects.requireNonNull(id); @@ -126,7 +126,7 @@ public static SnapshotId generateId() { public static Snapshot create(SnapshotId id, HostName hostname, CloudAccount cloudAccount, Instant at, ClusterId cluster, int clusterIndex, SnapshotKey encryptionKey) { return new Snapshot(id, hostname, State.creating, History.of(State.creating, at), cluster, clusterIndex, - cloudAccount, Optional.of(encryptionKey)); + cloudAccount, encryptionKey); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java index 9ecc386bb98d..6175d0daf2c7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java @@ -182,11 +182,8 @@ private SecretSharedKey generateEncryptionKey(KeyPair keyPair, SnapshotId id) { /** Reseal the encryption key for snapshot using given public key */ private SealedSharedKey resealKeyOf(Snapshot snapshot, PublicKey receiverPublicKey) { - if (snapshot.key().isEmpty()) { - throw new IllegalArgumentException("Snapshot " + snapshot.id() + " has no encryption key"); - } - VersionedKeyPair sealingKeyPair = sealingKeyPair(snapshot.key().get().sealingKeyVersion()); - SecretSharedKey unsealedKey = SharedKeyGenerator.fromSealedKey(snapshot.key().get().sharedKey(), + VersionedKeyPair sealingKeyPair = sealingKeyPair(snapshot.key().sealingKeyVersion()); + SecretSharedKey unsealedKey = SharedKeyGenerator.fromSealedKey(snapshot.key().sharedKey(), sealingKeyPair.keyPair().getPrivate()); return SharedKeyGenerator.reseal(unsealedKey, receiverPublicKey, KeyId.ofString(snapshot.id().toString())) .sealedSharedKey(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java index 9935542d3241..940301244aeb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java @@ -20,7 +20,6 @@ import java.time.Instant; import java.util.List; -import java.util.Optional; /** * @author mpolden @@ -50,13 +49,11 @@ public static Snapshot fromInspector(Inspector object, CloudAccount systemAccoun Instant at = Instant.ofEpochMilli(inspector.field(AT_FIELD).asLong()); history.put(type, new Snapshot.History.Event(type, at)); }); - Optional encryptionKey = Optional.empty(); - if (object.field(SEALED_SHARED_KEY_FIELD).valid()) { - SealedSharedKey sharedKey = SealedSharedKey.fromTokenString(object.field(SEALED_SHARED_KEY_FIELD).asString()); - SecretVersionId sealingKeyVersion = SecretVersionId.of(object.field(SEALING_KEY_VERSION).asString()); - encryptionKey = Optional.of(new SnapshotKey(sharedKey, sealingKeyVersion)); - } CloudAccount cloudAccount = CloudAccount.from(object.field(CLOUD_ACCOUNT_FIELD).asString()); + SealedSharedKey sharedKey = SealedSharedKey.fromTokenString(object.field(SEALED_SHARED_KEY_FIELD).asString()); + SecretVersionId sealingKeyVersion = SecretVersionId.of(object.field(SEALING_KEY_VERSION).asString()); + SnapshotKey encryptionKey = new SnapshotKey(sharedKey, sealingKeyVersion); + return new Snapshot(SnapshotId.of(object.field(ID_FIELD).asString()), HostName.of(object.field(HOSTNAME_FIELD).asString()), stateFromSlime(object.field(STATE_FIELD).asString()), @@ -110,10 +107,8 @@ public static void toSlime(Snapshot snapshot, Cursor object) { eventObject.setLong(AT_FIELD, event.at().toEpochMilli()); }); object.setString(CLOUD_ACCOUNT_FIELD, snapshot.cloudAccount().value()); - snapshot.key().ifPresent(k -> { - object.setString(SEALED_SHARED_KEY_FIELD, k.sharedKey().toTokenString()); - object.setString(SEALING_KEY_VERSION, k.sealingKeyVersion().value()); - }); + object.setString(SEALED_SHARED_KEY_FIELD, snapshot.key().sharedKey().toTokenString()); + object.setString(SEALING_KEY_VERSION, snapshot.key().sealingKeyVersion().value()); } public static String asString(Snapshot.State state) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java index 7a6507f9847f..03044319d54f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java @@ -248,7 +248,7 @@ static void toSlime(WireguardKeyWithTimestamp keyWithTimestamp, Cursor object) { private void toSlime(Snapshot snapshot, Cursor object) { object.setString("id", snapshot.id().toString()); object.setString("state", SnapshotSerializer.asString(snapshot.state())); - object.setBool("encrypted", snapshot.key().isPresent()); + object.setBool("encrypted", true); } private Optional currentContainerImage(Node node) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotTest.java index a3a1d48fdaad..7cf2a7aedac5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotTest.java @@ -1,14 +1,19 @@ package com.yahoo.vespa.hosted.provision.backup; +import ai.vespa.secret.model.SecretVersionId; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostName; +import com.yahoo.security.KeyId; +import com.yahoo.security.KeyUtils; +import com.yahoo.security.SecretSharedKey; +import com.yahoo.security.SharedKeyGenerator; import com.yahoo.vespa.hosted.provision.node.ClusterId; import org.junit.jupiter.api.Test; +import java.security.PublicKey; import java.time.Instant; -import java.util.Optional; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.fail; @@ -47,9 +52,12 @@ private static void assertDisallowed(Snapshot.State from, Snapshot.State to) { } private static Snapshot snapshot(Snapshot.State state) { + PublicKey publicKey = KeyUtils.generateX25519KeyPair().getPublic(); + SecretSharedKey sharedKey = SharedKeyGenerator.generateForReceiverPublicKey(publicKey, + KeyId.ofString("mykey")); return new Snapshot(Snapshot.generateId(), HostName.of("h1.example.com"), state, Snapshot.History.of(state, Instant.ofEpochMilli(123)), new ClusterId(ApplicationId.defaultId(), ClusterSpec.Id.from("c1")), - 0, CloudAccount.empty, Optional.empty()); + 0, CloudAccount.empty, new SnapshotKey(sharedKey.sealedSharedKey(), SecretVersionId.of("v1"))); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java index cc3928550829..0f8b54247b2b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java @@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; /** * @author mpolden @@ -46,12 +45,11 @@ void snapshot() { Snapshots snapshots = tester.nodeRepository().snapshots(); String node0 = nodes.get(0).hostname(); Snapshot snapshot0 = snapshots.create(node0, tester.clock().instant()); - assertTrue(snapshot0.key().isPresent()); // Request snapshot key PublicKey receiverPublicKey = KeyUtils.generateX25519KeyPair().getPublic(); SealedSharedKey resealedKey = snapshots.keyOf(snapshot0.id(), node0, receiverPublicKey); - assertNotEquals(snapshot0.key().get().sharedKey(), resealedKey); + assertNotEquals(snapshot0.key().sharedKey(), resealedKey); // Sealing key can be rotated independently of existing snapshots KeyPair keyPair = KeyUtils.generateX25519KeyPair(); @@ -59,14 +57,14 @@ void snapshot() { KeyUtils.toBase64EncodedX25519PrivateKey((XECPrivateKey) keyPair.getPrivate()) .getBytes(), SecretVersionId.of("2"))); - assertEquals(SecretVersionId.of("1"), snapshots.require(snapshot0.id(), node0).key().get().sealingKeyVersion()); - assertNotEquals(snapshot0.key().get().sharedKey(), snapshots.keyOf(snapshot0.id(), node0, receiverPublicKey), + assertEquals(SecretVersionId.of("1"), snapshots.require(snapshot0.id(), node0).key().sealingKeyVersion()); + assertNotEquals(snapshot0.key().sharedKey(), snapshots.keyOf(snapshot0.id(), node0, receiverPublicKey), "Can reseal after key rotation"); // Next snapshot uses latest sealing key String node1 = nodes.get(1).hostname(); Snapshot snapshot1 = snapshots.create(node1, tester.clock().instant()); - assertEquals(SecretVersionId.of("2"), snapshot1.key().get().sealingKeyVersion()); + assertEquals(SecretVersionId.of("2"), snapshot1.key().sealingKeyVersion()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializerTest.java index e4574af18f72..4e858d630c92 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializerTest.java @@ -18,7 +18,6 @@ import java.security.PublicKey; import java.time.Instant; import java.util.List; -import java.util.Optional; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -41,7 +40,7 @@ void serialization() { ClusterSpec.Id.from("c1")), 0, CloudAccount.from("aws:000123456789"), - Optional.empty() + new SnapshotKey(sharedKey.sealedSharedKey(), SecretVersionId.of("v2")) ); Snapshot snapshot1 = new Snapshot(SnapshotId.of("7e45b44a-0f1a-4729-a4f4-20fff5d1e85d"), HostName.of("host1.example.com"), @@ -52,7 +51,7 @@ void serialization() { ClusterSpec.Id.from("c2")), 2, CloudAccount.from("aws:777123456789"), - Optional.of(new SnapshotKey(sharedKey.sealedSharedKey(), SecretVersionId.of("v1"))) + new SnapshotKey(sharedKey.sealedSharedKey(), SecretVersionId.of("v1")) ); assertEquals(snapshot0, SnapshotSerializer.fromSlime(SnapshotSerializer.toSlime(snapshot0), systemAccount)); List snapshots = List.of(snapshot0, snapshot1); From 2137d81cb9e13d562dde709e29fc29015421eb64 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 14 Nov 2024 13:21:18 +0000 Subject: [PATCH 024/126] set the "isFromQuery" when producing WordItems from semantic rules --- .../src/main/java/com/yahoo/prelude/semantics/engine/Match.java | 2 +- .../yahoo/prelude/semantics/rule/LiteralPhraseProduction.java | 2 +- .../com/yahoo/prelude/semantics/rule/LiteralTermProduction.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java index fbcd8935ebe0..d59f9ad7f6ad 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java @@ -65,7 +65,7 @@ public Item toItem(String label) { /** Returns a new item representing this match */ public Item toItem(String label, String term) { - var newItem = new WordItem(term, label); + var newItem = new WordItem(term, label, true); newItem.setWeight(item.getWeight()); return newItem; } diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java index 42ad31382b89..7d4072efdd62 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java @@ -48,7 +48,7 @@ public void produce(RuleEvaluation e, int offset) { PhraseItem newPhrase = new PhraseItem(); newPhrase.setIndexName(getLabel()); for (String term : terms) - newPhrase.addItem(new WordItem(term)); + newPhrase.addItem(new WordItem(term, true)); Match matched = e.getNonreferencedMatch(0); insertMatch(e, matched, List.of(newPhrase), offset); diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java index 2e1c16902228..0f5cdcb0d03f 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java @@ -61,7 +61,7 @@ public void setLiteral(String literal) { public String getLiteral() { return literal; } public void produce(RuleEvaluation e, int offset) { - WordItem newItem = new WordItem(literal, getLabel()); + WordItem newItem = new WordItem(literal, getLabel(), true); if (replacing) { Match matched = e.getNonreferencedMatch(0); newItem.setWeight(matched.getItem().getWeight()); From 1cdc9f5b01880fd18c7b04adb3b950eca41db541 Mon Sep 17 00:00:00 2001 From: gjoranv Date: Thu, 14 Nov 2024 14:38:09 +0100 Subject: [PATCH 025/126] Add forgotten writeTenantVaults --- .../com/yahoo/vespa/config/server/session/SessionSerializer.java | 1 + 1 file changed, 1 insertion(+) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java index fc6ea76eae01..8afc678d3ff5 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java @@ -43,6 +43,7 @@ void write(SessionZooKeeperClient zooKeeperClient, ApplicationId applicationId, zooKeeperClient.writeDockerImageRepository(dockerImageRepository); zooKeeperClient.writeAthenzDomain(athenzDomain); zooKeeperClient.writeQuota(quota); + zooKeeperClient.writeTenantVaults(tenantVaults); zooKeeperClient.writeTenantSecretStores(tenantSecretStores); zooKeeperClient.writeOperatorCertificates(operatorCertificates); zooKeeperClient.writeCloudAccount(cloudAccount); From 289feed552e766a36c881c43c15106e8c8403629 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 14 Nov 2024 14:40:03 +0100 Subject: [PATCH 026/126] Cache disk index bitvectors. --- .../proton/metrics/content_proton_metrics.cpp | 3 +- .../proton/metrics/content_proton_metrics.h | 1 + .../vespa/searchcore/proton/server/proton.cpp | 24 ++++-- .../vespa/searchcore/proton/server/proton.h | 1 + .../diskindex/diskindex/diskindex_test.cpp | 2 +- .../posting_list_cache_test.cpp | 41 +++++++++- .../searchlib/common/allocatedbitvector.cpp | 10 +++ .../searchlib/common/allocatedbitvector.h | 2 + .../src/vespa/searchlib/common/bitvector.cpp | 17 ++++- .../src/vespa/searchlib/common/bitvector.h | 4 +- .../searchlib/common/partialbitvector.cpp | 10 +++ .../vespa/searchlib/common/partialbitvector.h | 2 + .../src/vespa/searchlib/common/read_stats.h | 24 ++++++ .../diskindex/bitvectordictionary.cpp | 16 +++- .../searchlib/diskindex/bitvectordictionary.h | 10 ++- .../searchlib/diskindex/disktermblueprint.h | 2 +- .../vespa/searchlib/diskindex/field_index.cpp | 40 +++++++++- .../vespa/searchlib/diskindex/field_index.h | 6 +- .../diskindex/i_posting_list_cache.h | 27 +++++++ .../diskindex/posting_list_cache.cpp | 76 ++++++++++++++++++- .../searchlib/diskindex/posting_list_cache.h | 9 ++- .../test/diskindex/testdiskindex.cpp | 2 +- .../vespalib/util/round_up_to_page_size.cpp | 5 ++ .../vespalib/util/round_up_to_page_size.h | 6 ++ 24 files changed, 310 insertions(+), 30 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/common/read_stats.h diff --git a/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.cpp index 06a43725136b..2c44db6f3ef8 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.cpp @@ -29,7 +29,8 @@ ContentProtonMetrics::ProtonExecutorMetrics::~ProtonExecutorMetrics() = default; ContentProtonMetrics::IndexMetrics::CacheMetrics::CacheMetrics(metrics::MetricSet* parent) : metrics::MetricSet("cache", {}, "Metrics for caches", parent), - postinglist(this, "postinglist", "Posting list cache metrics", "postinglist_cache") + postinglist(this, "postinglist", "Posting list cache metrics", "postinglist_cache"), + bitvector(this, "bitvector", "Bitvector cache metrics", "bitvector_cache") { } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.h index 75167f1771b7..d3769c7a6f6b 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.h @@ -46,6 +46,7 @@ struct ContentProtonMetrics : metrics::MetricSet struct CacheMetrics : public metrics::MetricSet { proton::CacheMetrics postinglist; + proton::CacheMetrics bitvector; explicit CacheMetrics(metrics::MetricSet* parent); ~CacheMetrics() override; diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp index fe4cacbbeddf..68f89734e8fc 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp @@ -165,10 +165,11 @@ void ensureWritableDir(const std::string &dirName) { std::shared_ptr make_posting_list_cache(const ProtonConfig& cfg) { - if (cfg.search.io == ProtonConfig::Search::Io::MMAP || cfg.index.cache.postinglist.maxbytes == 0) { + if (cfg.search.io == ProtonConfig::Search::Io::MMAP || + (cfg.index.cache.postinglist.maxbytes == 0 && cfg.index.cache.bitvector.maxbytes == 0)) { return {}; } - return std::make_shared(cfg.index.cache.postinglist.maxbytes); + return std::make_shared(cfg.index.cache.postinglist.maxbytes, cfg.index.cache.bitvector.maxbytes); } } // namespace @@ -282,7 +283,8 @@ Proton::Proton(FNET_Transport & transport, const config::ConfigUri & configUri, _nodeUpLock(), _nodeUp(), _posting_list_cache(), - _last_posting_list_cache_stats() + _last_posting_list_cache_stats(), + _last_bitvector_cache_stats() { } BootstrapConfig::SP @@ -805,6 +807,13 @@ updateSessionCacheMetrics(ContentProtonMetrics &metrics, proton::matching::Sessi metrics.sessionCache.grouping.update(groupingStats); } +void +update_cache_stats(CacheMetrics& metrics, const vespalib::CacheStats& stats, vespalib::CacheStats& last_stats) +{ + metrics.update_metrics(stats, last_stats); + last_stats = stats; +} + } void @@ -872,9 +881,12 @@ Proton::updateMetrics(const metrics::MetricLockGuard &) } } if (_posting_list_cache) { - auto stats = _posting_list_cache->get_stats(); - _metricsEngine->root().index.cache.postinglist.update_metrics(stats, _last_posting_list_cache_stats); - _last_posting_list_cache_stats = stats; + update_cache_stats(_metricsEngine->root().index.cache.postinglist, + _posting_list_cache->get_stats(), + _last_posting_list_cache_stats); + update_cache_stats(_metricsEngine->root().index.cache.bitvector, + _posting_list_cache->get_bitvector_stats(), + _last_bitvector_cache_stats); } } diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h index 4b08a83c681a..198037f7bad2 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.h +++ b/searchcore/src/vespa/searchcore/proton/server/proton.h @@ -133,6 +133,7 @@ class Proton : public IProtonConfigurerOwner, std::set _nodeUp; // bucketspaces where node is up std::shared_ptr _posting_list_cache; vespalib::CacheStats _last_posting_list_cache_stats; + vespalib::CacheStats _last_bitvector_cache_stats; std::shared_ptr addDocumentDB(const DocTypeName & docTypeName, BucketSpace bucketSpace, const std::string & configid, diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index ee8bbae184ed..575152225af7 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -289,7 +289,7 @@ DiskIndexTest::requireThatWeCanReadBitVector() auto& field_index = _index->get_field_index(1); auto blr = field_index.lookup_bit_vector(r); EXPECT_TRUE(blr.valid()); - BitVector::UP bv = field_index.read_bit_vector(blr); + auto bv = field_index.read_bit_vector(blr); EXPECT_TRUE(bv.get() != nullptr); EXPECT_TRUE(*bv == *exp); } diff --git a/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp b/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp index a776b9da70e4..1a1ac8164dab 100644 --- a/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp +++ b/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp @@ -1,9 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include #include #include #include +using search::BitVector; using search::diskindex::PostingListCache; using search::index::PostingListHandle; @@ -14,6 +16,7 @@ class MockFile : public PostingListCache::IPostingListFileBacking { MockFile(); ~MockFile() override; PostingListHandle read(const PostingListCache::Key& key) const override; + std::shared_ptr read(const PostingListCache::BitVectorKey& key, PostingListCache::Context& ctx) const override; }; MockFile::MockFile() @@ -32,25 +35,39 @@ MockFile::read(const PostingListCache::Key& key) const return handle; } +std::shared_ptr +MockFile::read(const PostingListCache::BitVectorKey& key, PostingListCache::Context& ctx) const +{ + EXPECT_NE(0, key.lookup_result.idx); + ctx.cache_miss = true; + return BitVector::create(100 * key.file_id + key.lookup_result.idx); +} + } class PostingListCacheTest : public ::testing::Test { protected: using Key = PostingListCache::Key; + using BitVectorKey = PostingListCache::BitVectorKey; MockFile _mock_file; PostingListCache _cache; Key _key; + BitVectorKey _bv_key; + PostingListCache::Context _ctx; PostingListCacheTest(); ~PostingListCacheTest() override; - PostingListHandle read() { return _cache.read(_key); } + PostingListHandle read() const { return _cache.read(_key); } + std::shared_ptr read_bv() { return _cache.read(_bv_key, _ctx); } }; PostingListCacheTest::PostingListCacheTest() : ::testing::Test(), _mock_file(), - _cache(256_Ki), - _key() + _cache(256_Ki, 256_Ki), + _key(), + _bv_key(), + _ctx(&_mock_file) { _key.backing_store_file = &_mock_file; } @@ -104,4 +121,22 @@ TEST_F(PostingListCacheTest, file_id_is_part_of_key) EXPECT_EQ(2, stats.elements); } +TEST_F(PostingListCacheTest, repeated_bitvector_lookup_gives_hit) +{ + _bv_key.lookup_result.idx = 1; + _bv_key.file_id = 2; + _ctx.cache_miss = false; + auto bv = read_bv(); + EXPECT_TRUE(_ctx.cache_miss); + _ctx.cache_miss = false; + auto bv2 = read_bv(); + EXPECT_FALSE(_ctx.cache_miss); + EXPECT_EQ(bv, bv2); + auto stats = _cache.get_bitvector_stats(); + EXPECT_EQ(1, stats.misses); + EXPECT_EQ(1, stats.hits); + EXPECT_EQ(1, stats.elements); + EXPECT_EQ(PostingListCache::bitvector_element_size() + bv->get_allocated_bytes(true), stats.memory_used); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp b/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp index 7be5ce84a012..08d08c6723ef 100644 --- a/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp @@ -108,4 +108,14 @@ AllocatedBitVector::resize(Index newLength) clear(); } +size_t +AllocatedBitVector::get_allocated_bytes(bool include_self) const noexcept +{ + size_t result = extraByteSize(); + if (include_self) { + result += sizeof(AllocatedBitVector); + } + return result; +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/common/allocatedbitvector.h b/searchlib/src/vespa/searchlib/common/allocatedbitvector.h index 9884e389dee9..f4dcad7fca36 100644 --- a/searchlib/src/vespa/searchlib/common/allocatedbitvector.h +++ b/searchlib/src/vespa/searchlib/common/allocatedbitvector.h @@ -58,6 +58,8 @@ class AllocatedBitVector : public BitVector */ void resize(Index newLength); + size_t get_allocated_bytes(bool include_self) const noexcept override; + protected: Index _capacityBits; Alloc _alloc; diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index ea514abaafca..abe67efd4bfa 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -3,10 +3,12 @@ #include "bitvector.h" #include "allocatedbitvector.h" #include "partialbitvector.h" +#include "read_stats.h" #include #include #include #include +#include #include #include #include @@ -365,6 +367,8 @@ class MMappedBitVector : public BitVector MMappedBitVector(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount); + size_t get_allocated_bytes(bool include_self) const noexcept override; + private: void read(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount); @@ -372,10 +376,12 @@ class MMappedBitVector : public BitVector BitVector::UP BitVector::create(Index numberOfElements, FastOS_FileInterface &file, - int64_t offset, Index doccount) + int64_t offset, Index doccount, ReadStats& read_stats) { UP bv; if (file.IsMemoryMapped()) { + size_t pad_before = offset - vespalib::round_down_to_page_boundary(offset); + read_stats.read_bytes = vespalib::round_up_to_page_size(pad_before + getFileBytes(numberOfElements)); bv = std::make_unique(numberOfElements, file, offset, doccount); } else { size_t padbefore, padafter; @@ -385,7 +391,8 @@ BitVector::create(Index numberOfElements, FastOS_FileInterface &file, AllocatedBitVector::Alloc alloc = Alloc::alloc(padbefore + vectorsize + padafter, MMAP_LIMIT, FileSettings::DIRECTIO_ALIGNMENT); void * alignedBuffer = alloc.get(); - file.ReadBuf(alignedBuffer, alloc.size(), offset - padbefore); + file.ReadBuf(alignedBuffer, padbefore + vectorsize + padafter, offset - padbefore); + read_stats.read_bytes = padbefore + vectorsize + padafter; bv = std::make_unique(numberOfElements, std::move(alloc), padbefore); bv->setTrueBits(doccount); // Check guard bit for getNextTrueBit() @@ -450,6 +457,12 @@ MMappedBitVector::read(Index numberOfElements, FastOS_FileInterface &file, setTrueBits(doccount); } +size_t +MMappedBitVector::get_allocated_bytes(bool include_self) const noexcept +{ + return include_self ? sizeof(MMappedBitVector) : 0; +} + nbostream & operator<<(nbostream &out, const BitVector &bv) { diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h index 3d51ea3eb15e..8067cc96d9b9 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.h +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -21,6 +21,7 @@ class FastOS_FileInterface; namespace search { class PartialBitVector; +struct ReadStats; class AllocatedBitVector; class BitVector : protected BitWord @@ -277,7 +278,7 @@ class BitVector : protected BitWord * @param offset Where bitvector image is located in the file. * @param doccount Number of bits set in bitvector */ - static UP create(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount); + static UP create(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount, ReadStats& read_stats); static UP create(Index start, Index end); static UP create(const BitVector & org, Index start, Index end); static UP create(Index numberOfElements); @@ -291,6 +292,7 @@ class BitVector : protected BitWord static void parallellOr(vespalib::ThreadBundle & thread_bundle, std::span vectors); static Index numWords(Index bits) noexcept { return wordNum(bits + 1 + (WordLen - 1)); } static Index numBytes(Index bits) noexcept { return numWords(bits) * sizeof(Word); } + virtual size_t get_allocated_bytes(bool include_self) const noexcept = 0; protected: using Alloc = vespalib::alloc::Alloc; VESPA_DLL_LOCAL BitVector(void * buf, Index start, Index end) noexcept; diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.cpp b/searchlib/src/vespa/searchlib/common/partialbitvector.cpp index 1ec0c10e411b..3f521842ca50 100644 --- a/searchlib/src/vespa/searchlib/common/partialbitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/partialbitvector.cpp @@ -39,4 +39,14 @@ PartialBitVector::PartialBitVector(const BitVector & org, Index start, Index end PartialBitVector::~PartialBitVector() = default; +size_t +PartialBitVector::get_allocated_bytes(bool include_self) const noexcept +{ + size_t result = _alloc.size(); + if (include_self) { + result += sizeof(PartialBitVector); + } + return result; +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.h b/searchlib/src/vespa/searchlib/common/partialbitvector.h index 4cd4e94bf8a8..76c66f31682c 100644 --- a/searchlib/src/vespa/searchlib/common/partialbitvector.h +++ b/searchlib/src/vespa/searchlib/common/partialbitvector.h @@ -27,6 +27,8 @@ class PartialBitVector : public BitVector ~PartialBitVector() override; + size_t get_allocated_bytes(bool include_self) const noexcept override; + private: vespalib::alloc::Alloc _alloc; }; diff --git a/searchlib/src/vespa/searchlib/common/read_stats.h b/searchlib/src/vespa/searchlib/common/read_stats.h new file mode 100644 index 000000000000..4f33a0ee3893 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/read_stats.h @@ -0,0 +1,24 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +/* + * Struct passed to read functions to pick up information about read + * stats. + */ +struct ReadStats +{ + uint64_t read_bytes; // bytes read from disk or bytes in pages containing the data + ReadStats() noexcept + : read_bytes(0) + { } + void clear() noexcept { + read_bytes = 0; + } +}; + +} diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp index 8ea41148be9b..e9dce0f06e5a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp @@ -3,6 +3,7 @@ #include "bitvectordictionary.h" #include #include +#include #include #include #include @@ -21,7 +22,8 @@ BitVectorDictionary::BitVectorDictionary() _entries(), _vectorSize(0u), _datFile(), - _datHeaderLen(0u) + _datHeaderLen(0u), + _memory_mapped(false) { } BitVectorDictionary::~BitVectorDictionary() = default; @@ -83,6 +85,7 @@ BitVectorDictionary::open(const std::string &pathPrefix, vespalib::FileHeader datHeader(64); _datHeaderLen = datHeader.readFile(*_datFile); assert(_datFile->getSize() >= static_cast(_vectorSize * _entries.size() + _datHeaderLen)); + _memory_mapped = (_datFile->MemoryMapPtr(0) != nullptr); return true; } @@ -98,13 +101,20 @@ BitVectorDictionary::lookup(uint64_t wordNum) { } std::unique_ptr -BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_result) +BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_result, ReadStats& read_stats) { if (!lookup_result.valid()) { return {}; } int64_t offset = ((int64_t) _vectorSize) * lookup_result.idx + _datHeaderLen; - return BitVector::create(_docIdLimit, *_datFile, offset, _entries[lookup_result.idx]._numDocs); + return BitVector::create(_docIdLimit, *_datFile, offset, _entries[lookup_result.idx]._numDocs, read_stats); +} + +std::unique_ptr +BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_result) +{ + ReadStats read_stats; + return read_bitvector(lookup_result, read_stats); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h index b3ce7a183468..76f8c5d039bc 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h @@ -11,6 +11,7 @@ class FastOS_FileInterface; namespace search { class BitVector; } +namespace search { struct ReadStats; } namespace search::diskindex { @@ -29,6 +30,7 @@ class BitVectorDictionary size_t _vectorSize; std::unique_ptr _datFile; uint32_t _datHeaderLen; + bool _memory_mapped; public: using SP = std::shared_ptr; @@ -62,13 +64,17 @@ class BitVectorDictionary * Load and return the associated bit vector if lookup result is valid. * * @param lookup_result the result returned from lookup. + * @param read_stats statistics to be updated when reading bit vector * @return the loaded bit vector or empty if lookup result was invalid. **/ + std::unique_ptr read_bitvector(index::BitVectorDictionaryLookupResult lookup_result, + ReadStats &read_stats); std::unique_ptr read_bitvector(index::BitVectorDictionaryLookupResult lookup_result); - uint32_t getDocIdLimit() const { return _docIdLimit; } + uint32_t getDocIdLimit() const noexcept { return _docIdLimit; } - const std::vector & getEntries() const { return _entries; } + const std::vector & getEntries() const noexcept { return _entries; } + bool get_memory_mapped() const noexcept { return _memory_mapped; } }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h index 1b0196914f4f..c794578fa15c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h @@ -21,7 +21,7 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint bool _useBitVector; bool _fetchPostingsDone; index::PostingListHandle _postingHandle; - BitVector::UP _bitVector; + std::shared_ptr _bitVector; public: /** diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index dac7b1cf7a5f..37b568221199 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -4,6 +4,7 @@ #include "fileheader.h" #include "pagedict4randread.h" #include +#include #include #include #include @@ -48,6 +49,8 @@ FieldIndex::FieldIndex() _size_on_disk(0), _cache_disk_io_stats(std::make_shared()), _posting_list_cache(), + _posting_list_cache_enabled(false), + _bitvector_cache_enabled(false), _field_id(0) { } @@ -57,6 +60,8 @@ FieldIndex::FieldIndex(uint32_t field_id, std::shared_ptr pos { _field_id = field_id; _posting_list_cache = std::move(posting_list_cache); + _posting_list_cache_enabled = _posting_list_cache && _posting_list_cache->enabled_for_posting_lists(); + _bitvector_cache_enabled = _posting_list_cache && _posting_list_cache->enabled_for_bitvectors(); } FieldIndex::FieldIndex(FieldIndex&&) = default; @@ -191,7 +196,7 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const if (file == nullptr || lookup_result.counts._bitLength == 0) { return {}; } - if (file->getMemoryMapped() || !_posting_list_cache) { + if (file->getMemoryMapped() || !_posting_list_cache_enabled) { return read_uncached_posting_list(lookup_result); } IPostingListCache::Key key; @@ -216,13 +221,42 @@ FieldIndex::lookup_bit_vector(const DictionaryLookupResult& lookup_result) const return _bit_vector_dict->lookup(lookup_result.wordNum); } -std::unique_ptr +std::shared_ptr +FieldIndex::read_uncached_bit_vector(BitVectorDictionaryLookupResult lookup_result) const +{ + ReadStats read_stats; + auto result = _bit_vector_dict->read_bitvector(lookup_result, read_stats); + if (read_stats.read_bytes != 0) { + _cache_disk_io_stats->add_uncached_read_operation(read_stats.read_bytes); + } + return result; +} + +std::shared_ptr +FieldIndex::read(const IPostingListCache::BitVectorKey& key, IPostingListCache::Context& ctx) const +{ + ctx.cache_miss = true; + return read_uncached_bit_vector(key.lookup_result); +} + +std::shared_ptr FieldIndex::read_bit_vector(BitVectorDictionaryLookupResult lookup_result) const { if (!_bit_vector_dict) { return {}; } - return _bit_vector_dict->read_bitvector(lookup_result); + if (_bit_vector_dict->get_memory_mapped() || !_bitvector_cache_enabled) { + return read_uncached_bit_vector(lookup_result); + } + IPostingListCache::BitVectorKey key; + key.file_id = _file_id; + key.lookup_result = lookup_result; + IPostingListCache::Context ctx(this); + auto result = _posting_list_cache->read(key, ctx); + if (!ctx.cache_miss) { + _cache_disk_io_stats->add_cached_read_operation(result->getFileBytes()); + } + return result; } std::unique_ptr diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index 3fe002ab6775..304591bb1139 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -55,6 +55,8 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { uint64_t _size_on_disk; std::shared_ptr _cache_disk_io_stats; std::shared_ptr _posting_list_cache; + bool _posting_list_cache_enabled; + bool _bitvector_cache_enabled; static std::atomic _file_id_source; uint32_t _field_id; @@ -74,7 +76,9 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { index::PostingListHandle read(const IPostingListCache::Key& key) const override; index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const; index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const; - std::unique_ptr read_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; + std::shared_ptr read_uncached_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; + std::shared_ptr read(const IPostingListCache::BitVectorKey& key, IPostingListCache::Context& ctx) const override; + std::shared_ptr read_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; std::unique_ptr create_iterator(const search::index::DictionaryLookupResult& lookup_result, const index::PostingListHandle& handle, const search::fef::TermFieldMatchDataArray& tfmda) const; diff --git a/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h b/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h index b3c982c76f73..d10c28e0b695 100644 --- a/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h +++ b/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h @@ -2,10 +2,13 @@ #pragma once +#include #include #include #include +namespace search { class BitVector; } + namespace search::diskindex { /* @@ -28,6 +31,25 @@ class IPostingListCache { bit_length == rhs.bit_length; } }; + struct BitVectorKey { + uint64_t file_id; + index::BitVectorDictionaryLookupResult lookup_result; + BitVectorKey() noexcept : file_id(0), lookup_result() { } + size_t hash() const noexcept { return std::rotl(file_id, 40) + lookup_result.idx; } + bool operator==(const BitVectorKey& rhs) const noexcept { + return file_id == rhs.file_id && lookup_result.idx == rhs.lookup_result.idx; + } + }; + struct Context { + const IPostingListFileBacking* const backing_store_file; + bool cache_miss; + + Context(const IPostingListFileBacking *backing_store_file_in) noexcept + : backing_store_file(backing_store_file_in), + cache_miss(false) + { + } + }; /* * Interface class for reading posting list on cache miss. */ @@ -35,10 +57,15 @@ class IPostingListCache { public: virtual ~IPostingListFileBacking() = default; virtual search::index::PostingListHandle read(const Key& key) const = 0; + virtual std::shared_ptr read(const BitVectorKey& key, Context& ctx) const = 0; }; virtual ~IPostingListCache() = default; virtual search::index::PostingListHandle read(const Key& key) const = 0; + virtual std::shared_ptr read(const BitVectorKey& key, Context& ctx) const = 0; virtual vespalib::CacheStats get_stats() const = 0; + virtual vespalib::CacheStats get_bitvector_stats() const = 0; + virtual bool enabled_for_posting_lists() const noexcept = 0; + virtual bool enabled_for_bitvectors() const noexcept = 0; }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp index 9e09e3028d3a..7dc33a27574b 100644 --- a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp @@ -1,11 +1,14 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "posting_list_cache.h" +#include #include #include #include #include +#include +using search::index::BitVectorDictionaryLookupResult; using search::index::DictionaryLookupResult; using search::index::PostingListHandle; @@ -16,20 +19,28 @@ class PostingListCache::BackingStore public: BackingStore(); ~BackingStore(); - bool read(const IPostingListCache::Key& key, PostingListHandle& value) const; + bool read(const Key& key, PostingListHandle& value) const; + bool read(const BitVectorKey& key, std::shared_ptr& value, Context& ctx) const; }; PostingListCache::BackingStore::BackingStore() = default; PostingListCache::BackingStore::~BackingStore() = default; bool -PostingListCache::BackingStore::read(const IPostingListCache::Key& key, PostingListHandle& value) const +PostingListCache::BackingStore::read(const Key& key, PostingListHandle& value) const { // TODO: Store a smaller copy if posting list is small value = key.backing_store_file->read(key); return true; } +bool +PostingListCache::BackingStore::read(const BitVectorKey& key, std::shared_ptr& value, Context& ctx) const +{ + value = ctx.backing_store_file->read(key, ctx); + return true; +} + struct PostingListHandleSize { size_t operator() (const PostingListHandle & arg) const noexcept { return arg._allocSize; } }; @@ -56,10 +67,37 @@ PostingListCache::Cache::Cache(BackingStore& backing_store, size_t max_bytes) PostingListCache::Cache::~Cache() = default; -PostingListCache::PostingListCache(size_t max_bytes) +struct BitVectorCacheValueSize { + size_t operator() (const std::shared_ptr& bv) const noexcept { return bv->get_allocated_bytes(true); } +}; + +using BitVectorCacheParams = vespalib::CacheParam< + vespalib::LruParam>, + const PostingListCache::BackingStore, + vespalib::zero, + BitVectorCacheValueSize +>; + +class PostingListCache::BitVectorCache : public vespalib::cache { +public: + using Parent = vespalib::cache; + BitVectorCache(BackingStore& backing_store, size_t max_bytes); + ~BitVectorCache(); + static size_t element_size() { return sizeof(value_type); } +}; + +PostingListCache::BitVectorCache::BitVectorCache(BackingStore& backing_store, size_t max_bytes) + : Parent(backing_store, max_bytes) +{ +} + +PostingListCache::BitVectorCache::~BitVectorCache() = default; + +PostingListCache::PostingListCache(size_t max_bytes, size_t bitvector_max_bytes) : IPostingListCache(), _backing_store(std::make_unique()), - _cache(std::make_unique(*_backing_store, max_bytes)) + _cache(std::make_unique(*_backing_store, max_bytes)), + _bitvector_cache(std::make_unique(*_backing_store, bitvector_max_bytes)) { } @@ -71,16 +109,46 @@ PostingListCache::read(const Key& key) const return _cache->read(key); } +std::shared_ptr +PostingListCache::read(const BitVectorKey& key, Context& ctx) const +{ + return _bitvector_cache->read(key, ctx); +} + vespalib::CacheStats PostingListCache::get_stats() const { return _cache->get_stats(); } +vespalib::CacheStats +PostingListCache::get_bitvector_stats() const +{ + return _bitvector_cache->get_stats(); +} + +bool +PostingListCache::enabled_for_posting_lists() const noexcept +{ + return _cache->capacityBytes() != 0; +} + +bool +PostingListCache::enabled_for_bitvectors() const noexcept +{ + return _bitvector_cache->capacityBytes() != 0; +} + size_t PostingListCache::element_size() { return PostingListCache::Cache::element_size(); } +size_t +PostingListCache::bitvector_element_size() +{ + return PostingListCache::BitVectorCache::element_size(); +} + } diff --git a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h index b809b09549fc..ad4d21374cd2 100644 --- a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h +++ b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h @@ -15,14 +15,21 @@ class PostingListCache : public IPostingListCache { class BackingStore; private: class Cache; + class BitVectorCache; std::unique_ptr _backing_store; std::unique_ptr _cache; + std::unique_ptr _bitvector_cache; public: - PostingListCache(size_t max_bytes); + PostingListCache(size_t max_bytes, size_t bitvector_max_bytes); ~PostingListCache() override; search::index::PostingListHandle read(const Key& key) const override; + std::shared_ptr read(const BitVectorKey& key, Context& ctx) const override; vespalib::CacheStats get_stats() const override; + vespalib::CacheStats get_bitvector_stats() const override; + bool enabled_for_posting_lists() const noexcept override; + bool enabled_for_bitvectors() const noexcept override; static size_t element_size(); + static size_t bitvector_element_size(); }; } diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp index 4a2df1976943..f1db6da8fe3f 100644 --- a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp +++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp @@ -120,7 +120,7 @@ TestDiskIndex::openIndex(const std::string &dir, bool directio, bool readmmap, b } std::shared_ptr posting_list_cache; if (use_posting_list_cache) { - posting_list_cache = std::make_shared(256_Ki); + posting_list_cache = std::make_shared(256_Ki, 256_Ki); } _index = std::make_unique(dir, posting_list_cache); bool ok(_index->setup(tuneFileRead)); diff --git a/vespalib/src/vespa/vespalib/util/round_up_to_page_size.cpp b/vespalib/src/vespa/vespalib/util/round_up_to_page_size.cpp index 80b28d7e0274..86b13a253c17 100644 --- a/vespalib/src/vespa/vespalib/util/round_up_to_page_size.cpp +++ b/vespalib/src/vespa/vespalib/util/round_up_to_page_size.cpp @@ -11,6 +11,11 @@ const size_t page_size = getpagesize(); } +uint64_t round_down_to_page_boundary(uint64_t offset) +{ + return (offset & ~static_cast(page_size - 1)); +} + size_t round_up_to_page_size(size_t size) { return ((size + (page_size - 1)) & ~(page_size - 1)); diff --git a/vespalib/src/vespa/vespalib/util/round_up_to_page_size.h b/vespalib/src/vespa/vespalib/util/round_up_to_page_size.h index 4923d96d94e1..a06f7053a4a5 100644 --- a/vespalib/src/vespa/vespalib/util/round_up_to_page_size.h +++ b/vespalib/src/vespa/vespalib/util/round_up_to_page_size.h @@ -3,9 +3,15 @@ #pragma once #include +#include namespace vespalib { +/* + * Return offset rounded down to a page boundary. + */ +uint64_t round_down_to_page_boundary(uint64_t offset); + /* * Return sz rounded up to a multiple of page size. */ From 6550d59a44b504f802cee41c6c4e694d79296c7e Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Thu, 14 Nov 2024 13:34:18 +0000 Subject: [PATCH 027/126] Address review feedback * Only use 5 bits from hash per counter selection. * Add distinct `concept` for sketch hasher. * Add separate `add_and_count` method that combines updatating frequency and returning the new Min-Count. * Remove heuristic for _not_ counting sample towards decay. Might be re-added later once we have a realistic data set to test with. --- .../util/relative_frequency_sketch_test.cpp | 14 ++++- .../util/relative_frequency_sketch.cpp | 57 +++++++++++-------- .../vespalib/util/relative_frequency_sketch.h | 35 ++++++++---- 3 files changed, 68 insertions(+), 38 deletions(-) diff --git a/vespalib/src/tests/util/relative_frequency_sketch_test.cpp b/vespalib/src/tests/util/relative_frequency_sketch_test.cpp index 8acb894f5b28..83613356acc3 100644 --- a/vespalib/src/tests/util/relative_frequency_sketch_test.cpp +++ b/vespalib/src/tests/util/relative_frequency_sketch_test.cpp @@ -8,9 +8,9 @@ using namespace ::testing; namespace { -struct Identity { +struct IdentityHash { template - constexpr T operator()(T v) const noexcept { return v; } + constexpr size_t operator()(T v) const noexcept { return v; } }; } @@ -19,7 +19,7 @@ struct RelativeFrequencySketchTest : Test { // Note: although the sketch is inherently _probabilistic_, the below tests are fully // deterministic as long as the underlying hash function remains the same. This is also why // we explicitly do _not_ use std::hash here, but defer entirely to (deterministic) XXH3. - using U32FrequencySketch = RelativeFrequencySketch; + using U32FrequencySketch = RelativeFrequencySketch; }; TEST_F(RelativeFrequencySketchTest, frequency_estimates_are_initially_zero) { @@ -42,6 +42,14 @@ TEST_F(RelativeFrequencySketchTest, frequency_is_counted_up_to_and_saturated_at_ } } +TEST_F(RelativeFrequencySketchTest, add_and_count_returns_min_count_after_add) { + U32FrequencySketch sketch(2); + EXPECT_EQ(sketch.add_and_count(123), 1); + EXPECT_EQ(sketch.add_and_count(123), 2); + EXPECT_EQ(sketch.add_and_count(123), 3); + EXPECT_EQ(sketch.add_and_count(456), 1); +} + TEST_F(RelativeFrequencySketchTest, can_track_frequency_of_multiple_elements) { U32FrequencySketch sketch(3); sketch.add(100); diff --git a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp index 3ec8458f11aa..89042d703d31 100644 --- a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp +++ b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp @@ -17,11 +17,11 @@ namespace vespalib { */ RawRelativeFrequencySketch::RawRelativeFrequencySketch(size_t count) : _buf(alloc::Alloc::alloc_aligned(roundUp2inN(std::max(size_t(64U), count * 8)), 512)), - _samples_since_decay(0), + _estimated_sample_count(0), _window_size((_buf.size() / 8) * 10), _block_mask_bits(_buf.size() > 64 ? Optimized::msbIdx(_buf.size() / 64) : 0) { - assert(_block_mask_bits <= 48); // Will always be the case in practice, but it's an invariant... + assert(_block_mask_bits <= 44); // Will always be the case in practice, but it's an invariant... memset(_buf.get(), 0, _buf.size()); } @@ -40,43 +40,51 @@ RawRelativeFrequencySketch::~RawRelativeFrequencySketch() = default; * * Within the block we always update exactly 1 counter in each logical row. Use 5 distinct * bits from the hash for each of the 4 row updates (4 bits to select a byte out of 16, 1 for - * selecting either the high or low in-byte nibble). To make a nice round number, round up to - * consuming 8 bits per row (the 3 remaining bits are unused). + * selecting either the high or low in-byte nibble). * - * We use the same conditional decay trigger as the Caffeine sketch, in that we only bump - * the observed sample count (and possibly decay the counters) iff we actually increment at - * least one counter (i.e. not all counters are pre-saturated). The rationale for this is not - * stated outright in the code, but it makes sense as a way to gracefully handle repeated - * insertions of a small set of very high frequency elements. If we always counted these as - * distinct samples we would eventually decay the counters until we have forgotten _all_ - * elements that are not similarly frequent. + * Iff the estimated sample count reaches the window size threshold we implicitly divide all + * recorded 4-bit counters in half. */ -void RawRelativeFrequencySketch::add_by_hash(uint64_t hash) noexcept { +template +uint8_t RawRelativeFrequencySketch::add_by_hash_impl(uint64_t hash) noexcept { const uint64_t block = hash & ((1u << _block_mask_bits) - 1); hash >>= _block_mask_bits; assert(block*64 + 64 <= _buf.size()); auto* block_ptr = static_cast(_buf.get()) + (block * 64); - uint16_t old_counter_bits = 0; + uint8_t new_counters[4]; // The compiler will happily and easily unroll this loop. for (uint8_t i = 0; i < 4; ++i) { - uint8_t h = hash >> (i*8); // Note: we only use 5 out of the 8 bits + uint8_t h = hash >> (i*5); uint8_t* vp = block_ptr + (i * 16) + (h & 0xf); // row #i byte select const uint8_t v = *vp; h >>= 4; const uint8_t nib_shift = (h & 1) * 4; // High or low nibble shift factor (4 or 0) const uint8_t nib_mask = 0xf << nib_shift; const uint8_t nib_old = (v & nib_mask) >> nib_shift; - const uint8_t nib_new = nib_old < 15 ? nib_old + 1 : 15; // Saturated add + new_counters[i] = nib_old < 15 ? nib_old + 1 : 15; // Saturated add const uint8_t nib_rem = v & ~nib_mask; // Untouched nibble that should be preserved - old_counter_bits |= nib_old << (i * 4); - *vp = (nib_new << nib_shift) | nib_rem; + *vp = (new_counters[i] << nib_shift) | nib_rem; } - if (old_counter_bits != 0xffff && (++_samples_since_decay >= _window_size)) [[unlikely]] { + if (++_estimated_sample_count >= _window_size) [[unlikely]] { div_all_by_2(); - _samples_since_decay /= 2; + _estimated_sample_count /= 2; + } + if constexpr (ReturnMinCount) { + return std::min(std::min(new_counters[0], new_counters[1]), + std::min(new_counters[2], new_counters[3])); + } else { + return 0; } } +void RawRelativeFrequencySketch::add_by_hash(uint64_t hash) noexcept { + (void)add_by_hash_impl(hash); +} + +uint8_t RawRelativeFrequencySketch::add_and_count_by_hash(uint64_t hash) noexcept { + return add_by_hash_impl(hash); +} + /** * Estimates the count associated with the given hash. This uses the exact same counter * addressing as `add_by_hash()`, so refer to that function for a description on the @@ -93,7 +101,7 @@ uint8_t RawRelativeFrequencySketch::count_min_by_hash(uint64_t hash) const noexc const uint8_t* block_ptr = static_cast(_buf.get()) + (block * 64); uint8_t cm[4]; for (uint8_t i = 0; i < 4; ++i) { - uint8_t h = hash >> (i*8); + uint8_t h = hash >> (i*5); const uint8_t* vp = block_ptr + (i * 16) + (h & 0xf); // row #i byte select h >>= 4; const uint8_t nib_shift = (h & 1) * 4; // 4 or 0 @@ -103,7 +111,7 @@ uint8_t RawRelativeFrequencySketch::count_min_by_hash(uint64_t hash) const noexc return std::min(std::min(cm[0], cm[1]), std::min(cm[2], cm[3])); } -std::weak_ordering +std::strong_ordering RawRelativeFrequencySketch::estimate_relative_frequency_by_hash(uint64_t lhs_hash, uint64_t rhs_hash) const noexcept { return count_min_by_hash(lhs_hash) <=> count_min_by_hash(rhs_hash); } @@ -148,11 +156,12 @@ void RawRelativeFrequencySketch::div_all_by_2() noexcept { for (uint64_t i = 0; i < n_blocks; ++i) { for (uint32_t j = 0; j < 8; ++j) { uint64_t chunk; + static_assert(sizeof(chunk)*8 == 64); // Compiler will optimize away memcpys (avoids aliasing). - memcpy(&chunk, block_ptr + (sizeof(uint64_t) * j), sizeof(uint64_t)); + memcpy(&chunk, block_ptr + (8 * j), 8); chunk >>= 1; - chunk &= 0x7777777777777777ULL; // nibble ~MSB mask - memcpy(block_ptr + (sizeof(uint64_t) * j), &chunk, sizeof(uint64_t)); + chunk &= 0x7777'7777'7777'7777ULL; // nibble ~MSB mask + memcpy(block_ptr + (8 * j), &chunk, 8); } block_ptr += 64; } diff --git a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h index db008768792a..6b0fe4787138 100644 --- a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h +++ b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h @@ -55,7 +55,7 @@ namespace vespalib { */ class RawRelativeFrequencySketch { alloc::Alloc _buf; - size_t _samples_since_decay; + size_t _estimated_sample_count; size_t _window_size; uint32_t _block_mask_bits; public: @@ -63,14 +63,28 @@ class RawRelativeFrequencySketch { ~RawRelativeFrequencySketch(); void add_by_hash(uint64_t hash) noexcept; - [[nodiscard]] std::weak_ordering estimate_relative_frequency_by_hash(uint64_t lhs_hash, uint64_t rhs_hash) const noexcept; + [[nodiscard]] uint8_t add_and_count_by_hash(uint64_t hash) noexcept; + // Note: since this compares _hashes_ rather than elements this has strong ordering semantics. + [[nodiscard]] std::strong_ordering estimate_relative_frequency_by_hash(uint64_t lhs_hash, uint64_t rhs_hash) const noexcept; // Gets the raw underlying counter value saturated in [0, 15] for a given hash. [[nodiscard]] uint8_t count_min_by_hash(uint64_t hash) const noexcept; + [[nodiscard]] size_t window_size() const noexcept { return _window_size; } +private: void div_all_by_2() noexcept; - [[nodiscard]] size_t window_size() const noexcept { return _window_size; } + template + uint8_t add_by_hash_impl(uint64_t hash) noexcept; +}; + +template +concept SketchHasher = requires(H h, T t) { + // Hashers should never throw. + { h(t) } noexcept; + // We need a 64-bit hash output (not using uint64_t since STL is standardized + // on returning size_t from hash functions). + { h(t) } -> std::same_as; }; /** @@ -82,14 +96,13 @@ class RawRelativeFrequencySketch { * * See `RawRelativeFrequencySketch` for algorithm details. */ -template , bool HasGoodEntropyHash = false> -requires requires(Hash h, T t) { noexcept(noexcept(h(t))); } +template Hash = std::hash, bool HasGoodEntropyHash = false> class RelativeFrequencySketch { RawRelativeFrequencySketch _impl; - Hash _hash; + [[no_unique_address]] Hash _hash; public: // Initializes a sketch used for estimating frequencies for an underlying cache - // (or similar datastructure) that can hold a maximum of `count` entries. + // (or similar data structure) that can hold a maximum of `count` entries. explicit RelativeFrequencySketch(size_t count, Hash hash = Hash{}) : _impl(count), _hash(hash) @@ -109,6 +122,10 @@ class RelativeFrequencySketch { void add(const T& elem) noexcept { _impl.add_by_hash(hash_elem(elem)); } + // Same as `add` but returns Count-Min estimate from _after_ `elem` has been added. + [[nodiscard]] uint8_t add_and_count(const T& elem) noexcept { + return _impl.add_and_count_by_hash(hash_elem(elem)); + } // Returns a frequency estimate for the given element, saturated at 15. Since this is // a probabilistic sketch, the frequency may be overestimated. Note that automatic counter // decaying will over time reduce the reported frequency of elements that are no longer @@ -121,10 +138,6 @@ class RelativeFrequencySketch { const uint64_t rhs_hash = hash_elem(rhs); return _impl.estimate_relative_frequency_by_hash(lhs_hash, rhs_hash); } - // Manually trigger counter decay; divides all count estimates by 2 - void div_all_by_2() { - _impl.div_all_by_2(); - } // Sample count required before all counters are automatically divided by 2. // Note that invoking `add(v)` for an element `v` whose counters are _all_ fully // saturated prior to the invocation will _not_ count towards the sample count. From 772dfba6ee4623df0b8471d9f17c82b9a7195ebb Mon Sep 17 00:00:00 2001 From: Arnstein Ressem Date: Thu, 14 Nov 2024 14:42:36 +0100 Subject: [PATCH 028/126] Fix the release tag detection --- dist/release-vespa-rpm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/release-vespa-rpm.sh b/dist/release-vespa-rpm.sh index 87ff7f4c3403..7880d6c44cef 100755 --- a/dist/release-vespa-rpm.sh +++ b/dist/release-vespa-rpm.sh @@ -18,7 +18,7 @@ git checkout master git pull --rebase # Create a proper release tag if not there -if [[ $(git show-ref --tags "$RELEASE_TAG" | awk '{print $1}') != "$GITREF" ]]; then +if [[ $(git rev-list -n 1 "$RELEASE_TAG") != "$GITREF" ]]; then git tag -a "$RELEASE_TAG" -m "Release version $VERSION" $GITREF git push origin "$RELEASE_TAG" fi From 2931d237caba8a85edbc3dd4aff382f025eddda1 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 14 Nov 2024 15:43:48 +0100 Subject: [PATCH 029/126] Only read bitvector when it will be used. --- .../searchlib/diskindex/disktermblueprint.cpp | 33 +++++++++++++++---- .../searchlib/diskindex/disktermblueprint.h | 3 ++ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index 08578fce9f08..f6ccd0894107 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include LOG_SETUP(".diskindex.disktermblueprint"); @@ -51,7 +52,9 @@ DiskTermBlueprint::DiskTermBlueprint(const FieldSpec & field, _useBitVector(useBitVector), _fetchPostingsDone(false), _postingHandle(), - _bitVector() + _bitVector(), + _mutex(), + _late_bitvector() { setEstimate(HitEstimate(_lookupRes.counts._numDocs, _lookupRes.counts._numDocs == 0)); @@ -62,8 +65,10 @@ DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo) { (void) execInfo; if (!_fetchPostingsDone) { - _bitVector = _field_index.read_bit_vector(_bitvector_lookup_result); - if (!_useBitVector || !_bitVector) { + if (_useBitVector && _bitvector_lookup_result.valid()) { + _bitVector = _field_index.read_bit_vector(_bitvector_lookup_result); + } + if (!_bitVector) { _postingHandle = _field_index.read_posting_list(_lookupRes); } } @@ -77,13 +82,27 @@ DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const return {rel_est, disk_index_cost(rel_est), disk_index_strict_cost(rel_est)}; } +const BitVector * +DiskTermBlueprint::get_bitvector() const +{ + if (_bitVector) { + return _bitVector.get(); + } + std::lock_guard guard(_mutex); + if (!_late_bitvector) { + _late_bitvector = _field_index.read_bit_vector(_bitvector_lookup_result); + assert(_late_bitvector); + } + return _late_bitvector.get(); +} + SearchIterator::UP DiskTermBlueprint::createLeafSearch(const TermFieldMatchDataArray & tfmda) const { - if (_bitVector && (_useBitVector || tfmda[0]->isNotNeeded())) { + if (_bitvector_lookup_result.valid() && (_useBitVector || tfmda[0]->isNotNeeded())) { LOG(debug, "Return BitVectorIterator: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")", getName(_field_index.get_field_id()).c_str(), _lookupRes.wordNum, _lookupRes.counts._numDocs); - return BitVectorIterator::create(_bitVector.get(), *tfmda[0], strict()); + return BitVectorIterator::create(get_bitvector(), *tfmda[0], strict()); } auto search(_field_index.create_iterator(_lookupRes, _postingHandle, tfmda)); if (_useBitVector) { @@ -101,8 +120,8 @@ DiskTermBlueprint::createFilterSearch(FilterConstraint) const { auto wrapper = std::make_unique(getState().numFields()); auto & tfmda = wrapper->tfmda(); - if (_bitVector) { - wrapper->wrap(BitVectorIterator::create(_bitVector.get(), *tfmda[0], strict())); + if (_bitvector_lookup_result.valid()) { + wrapper->wrap(BitVectorIterator::create(get_bitvector(), *tfmda[0], strict())); } else { wrapper->wrap(_field_index.create_iterator(_lookupRes, _postingHandle, tfmda)); } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h index c794578fa15c..1eb20f72f86f 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h @@ -22,7 +22,10 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint bool _fetchPostingsDone; index::PostingListHandle _postingHandle; std::shared_ptr _bitVector; + mutable std::mutex _mutex; + mutable std::shared_ptr _late_bitvector; + const BitVector* get_bitvector() const; public: /** * Create a new blueprint. From 6ce365bb9a313d12035038c069332a3d1f9f5718 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 14 Nov 2024 16:01:39 +0100 Subject: [PATCH 030/126] Handle long hash --- .../java/com/yahoo/schema/SchemaTestCase.java | 30 +++- docprocs/src/test/cfg2/documentmanager.cfg | 55 ++++++++ docprocs/src/test/cfg2/ilscripts.cfg | 7 + .../indexing/IndexingProcessorTestCase.java | 130 +++++++++--------- .../indexing/IndexingProcessorTester.java | 72 ++++++++++ .../expressions/HashExpression.java | 2 +- .../expressions/LowerCaseExpression.java | 2 +- 7 files changed, 227 insertions(+), 71 deletions(-) create mode 100644 docprocs/src/test/cfg2/documentmanager.cfg create mode 100644 docprocs/src/test/cfg2/ilscripts.cfg create mode 100644 docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java diff --git a/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java b/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java index d77722bdd31b..aad915cead9f 100644 --- a/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java @@ -3,12 +3,16 @@ import com.yahoo.config.model.deploy.DeployState; import com.yahoo.document.Document; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.config.DocumentmanagerConfig; import com.yahoo.schema.derived.DerivedConfiguration; import com.yahoo.schema.derived.SchemaInfo; import com.yahoo.schema.document.Stemming; import com.yahoo.schema.parser.ParseException; import com.yahoo.schema.processing.ImportedFieldsResolver; import com.yahoo.schema.processing.OnnxModelTypeResolver; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; +import com.yahoo.vespa.configmodel.producers.DocumentManager; import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; import com.yahoo.vespa.indexinglanguage.expressions.Expression; @@ -18,6 +22,8 @@ import com.yahoo.vespa.model.test.utils.DeployLoggerStub; import org.junit.jupiter.api.Test; +import java.util.List; + import static com.yahoo.config.model.test.TestUtil.joinLines; import static org.junit.jupiter.api.Assertions.*; @@ -486,19 +492,31 @@ void testInheritingMultipleRankProfilesWithOverlappingConstructsIsDisallowed2() void testDeriving() throws Exception { String schema = """ - schema test { - field my_hash type long { - indexing: input my_string | hash | attribute + schema page { + + field domain_hash type long { + indexing: input domain | hash | attribute } - document test { - field my_string type string { + + document page { + + field domain type string { + indexing: index | summary + match: word + rank: filter } } }"""; ApplicationBuilder builder = new ApplicationBuilder(new DeployLoggerStub()); builder.addSchema(schema); var application = builder.build(false); // validate=false to test config deriving without validation - new DerivedConfiguration(application.schemas().get("test"), application.rankProfileRegistry()); + var derived = new DerivedConfiguration(application.schemas().get("page"), application.rankProfileRegistry()); + var ilConfig = new IlscriptsConfig.Builder(); + derived.getIndexingScript().getConfig(ilConfig); + + var documentModel = new DocumentModelBuilder(); + var documentManager = documentModel.build(List.of(application.schemas().get("page"))); + var documentConfig = new DocumentManager().produce(documentManager, new DocumentmanagerConfig.Builder()); } private void assertInheritedFromParent(Schema schema, RankProfileRegistry rankProfileRegistry) { diff --git a/docprocs/src/test/cfg2/documentmanager.cfg b/docprocs/src/test/cfg2/documentmanager.cfg new file mode 100644 index 000000000000..e649ac39a6da --- /dev/null +++ b/docprocs/src/test/cfg2/documentmanager.cfg @@ -0,0 +1,55 @@ +ignoreundefinedfields false +usev8geopositions false +usev8geopositions false +doctype[0].name "document" +doctype[0].idx 10000 +doctype[0].contentstruct 10001 +doctype[0].primitivetype[0].idx 10002 +doctype[0].primitivetype[0].name "bool" +doctype[0].primitivetype[1].idx 10003 +doctype[0].primitivetype[1].name "byte" +doctype[0].primitivetype[2].idx 10004 +doctype[0].primitivetype[2].name "double" +doctype[0].primitivetype[3].idx 10005 +doctype[0].primitivetype[3].name "float" +doctype[0].primitivetype[4].idx 10006 +doctype[0].primitivetype[4].name "float16" +doctype[0].primitivetype[5].idx 10007 +doctype[0].primitivetype[5].name "int" +doctype[0].primitivetype[6].idx 10008 +doctype[0].primitivetype[6].name "long" +doctype[0].primitivetype[7].idx 10010 +doctype[0].primitivetype[7].name "predicate" +doctype[0].primitivetype[8].idx 10011 +doctype[0].primitivetype[8].name "raw" +doctype[0].primitivetype[9].idx 10012 +doctype[0].primitivetype[9].name "string" +doctype[0].primitivetype[10].idx 10014 +doctype[0].primitivetype[10].name "uri" +doctype[0].wsettype[0].idx 10013 +doctype[0].wsettype[0].elementtype 10012 +doctype[0].wsettype[0].createifnonexistent true +doctype[0].wsettype[0].removeifzero true +doctype[0].structtype[0].idx 10001 +doctype[0].structtype[0].name "document.header" +doctype[0].structtype[1].idx 10009 +doctype[0].structtype[1].name "position" +doctype[0].structtype[1].field[0].name "x" +doctype[0].structtype[1].field[0].internalid 914677694 +doctype[0].structtype[1].field[0].type 10007 +doctype[0].structtype[1].field[1].name "y" +doctype[0].structtype[1].field[1].internalid 900009410 +doctype[0].structtype[1].field[1].type 10007 +doctype[1].name "page" +doctype[1].idx 10015 +doctype[1].inherits[0].idx 10000 +doctype[1].contentstruct 10016 +doctype[1].fieldsets{[document]}.fields[0] "domain" +doctype[1].structtype[0].idx 10016 +doctype[1].structtype[0].name "page.header" +doctype[1].structtype[0].field[0].name "domain" +doctype[1].structtype[0].field[0].internalid 1169984294 +doctype[1].structtype[0].field[0].type 10012 +doctype[1].structtype[0].field[1].name "domain_hash" +doctype[1].structtype[0].field[1].internalid 305760502 +doctype[1].structtype[0].field[1].type 10008 diff --git a/docprocs/src/test/cfg2/ilscripts.cfg b/docprocs/src/test/cfg2/ilscripts.cfg new file mode 100644 index 000000000000..a2bea7d1ffdc --- /dev/null +++ b/docprocs/src/test/cfg2/ilscripts.cfg @@ -0,0 +1,7 @@ +maxtermoccurrences 10000 +maxtokenlength 1000 +fieldmatchmaxlength 1000000 +ilscript[0].doctype "page" +ilscript[0].docfield[0] "domain" +ilscript[0].content[0] "clear_state | guard { input domain | hash | attribute domain_hash; }" +ilscript[0].content[1] "clear_state | guard { input domain | exact | index domain | summary domain; }" diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java index df7c1a442d4a..d5b4f2009c1c 100644 --- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java +++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java @@ -33,30 +33,28 @@ */ public class IndexingProcessorTestCase { - private static final String CONFIG_ID = "dir:src/test/cfg"; - - private final IndexingProcessor indexer = newProcessor(CONFIG_ID); - @Test public void requireThatIndexerForwardsDocumentsOfUnknownType() { + var tester = new IndexingProcessorTester(); Document input = new Document(new DocumentType("unknown"), "id:ns:unknown::"); - DocumentOperation output = process(new DocumentPut(input)); + DocumentOperation output = tester.process(new DocumentPut(input)); assertTrue(output instanceof DocumentPut); assertSame(input, ((DocumentPut)output).getDocument()); } @Test public void testPut() { + IndexingProcessorTester tester = new IndexingProcessorTester("src/test/cfg"); // 'combined' gets the value of both // 'combinedWithFallback' falls back to an empty string if an input is missing { // Both artist and title are set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); input.getDocument().setFieldValue(inputType.getField("artist"), new StringFieldValue("artist1")); input.getDocument().setFieldValue(inputType.getField("title"), new StringFieldValue("title1")); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertEquals("artist1", output.getFieldValue("artist").getWrappedValue()); assertEquals("title1", output.getFieldValue("title").getWrappedValue()); assertNull(output.getFieldValue("song")); @@ -65,11 +63,11 @@ public void testPut() { } { // Just artist is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); input.getDocument().setFieldValue(inputType.getField("artist"), new StringFieldValue("artist1")); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertEquals("artist1", output.getFieldValue("artist").getWrappedValue()); assertNull(output.getFieldValue("title")); assertNull(output.getFieldValue("song")); @@ -78,11 +76,11 @@ public void testPut() { } { // Just title is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); input.getDocument().setFieldValue(inputType.getField("title"), new StringFieldValue("title1")); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertEquals("title1", output.getFieldValue("title").getWrappedValue()); assertNull(output.getFieldValue("artist")); assertNull(output.getFieldValue("song")); @@ -91,11 +89,11 @@ public void testPut() { } { // Neither title nor artist is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); input.getDocument().setFieldValue(inputType.getField("song"), new StringFieldValue("song1")); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertNull(output.getFieldValue("artist")); assertNull(output.getFieldValue("title")); assertEquals("song1", output.getFieldValue("song").getWrappedValue()); @@ -104,10 +102,10 @@ public void testPut() { } { // None is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertNull(output.getFieldValue("artist")); assertNull(output.getFieldValue("title")); assertNull(output.getFieldValue("song")); @@ -116,99 +114,105 @@ public void testPut() { } } + @Test + public void testPutCfg2() { + // Config of the following schema, derived Nov 2024, by SchemaTestCase.testDeriving in the config-model + // + // schema page { + // + // field domain_hash type long { + // indexing: input domain | hash | attribute + // } + // + // document page { + // + // field domain type string { + // indexing: index | summary + // match: word + // rank: filter + // } + // } + // } + IndexingProcessorTester tester = new IndexingProcessorTester("src/test/cfg2"); + + { // Both artist and title are set + DocumentType inputType = tester.getDocumentType("page"); + DocumentPut input = new DocumentPut(inputType, "id:ns:page::"); + input.getDocument().setFieldValue(inputType.getField("domain"), new StringFieldValue("domain1")); + + Document output = ((DocumentPut)tester.process(input)).getDocument(); + assertEquals("domain1", output.getFieldValue("domain").getWrappedValue()); + assertEquals(1386505442371493468L, output.getFieldValue("domain_hash").getWrappedValue()); + } + } + @Test public void testUpdate() { + IndexingProcessorTester tester = new IndexingProcessorTester("src/test/cfg"); // 'combined' gets the value of artist and title // 'combinedWithFallback' falls back to an empty string if an input is missing { // Both artist and title are set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("artist"), new StringFieldValue("artist1"))); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("title"), new StringFieldValue("title1"))); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertEquals(4, output.fieldUpdates().size()); - assertAssignment("artist", "artist1", output); - assertAssignment("title", "title1", output); - assertAssignment("combined", "artist1 title1", output); - assertAssignment("combinedWithFallback", "artist1 title1", output); + tester.assertAssignment("artist", "artist1", output); + tester.assertAssignment("title", "title1", output); + tester.assertAssignment("combined", "artist1 title1", output); + tester.assertAssignment("combinedWithFallback", "artist1 title1", output); } { // Just artist is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("artist"), new StringFieldValue("artist1"))); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertEquals(2, output.fieldUpdates().size()); - assertAssignment("artist", "artist1", output); - assertAssignment("combinedWithFallback", "artist1 ", output); + tester.assertAssignment("artist", "artist1", output); + tester.assertAssignment("combinedWithFallback", "artist1 ", output); } { // Just title is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("title"), new StringFieldValue("title1"))); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertEquals(2, output.fieldUpdates().size()); - assertAssignment("title", "title1", output); - assertAssignment("combinedWithFallback", " title1", output); + tester.assertAssignment("title", "title1", output); + tester.assertAssignment("combinedWithFallback", " title1", output); } { // Neither title nor artist is set: Should not update embeddings even though it has fallbacks for all - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("song"), new StringFieldValue("song1"))); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertEquals(1, output.fieldUpdates().size()); - assertAssignment("song", "song1", output); + tester.assertAssignment("song", "song1", output); } { // None is set: Should not update anything - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertNull(output); } } @Test public void requireThatIndexerForwardsUpdatesOfUnknownType() { + var tester = new IndexingProcessorTester(); DocumentUpdate input = new DocumentUpdate(new DocumentType("unknown"), "id:ns:music::"); - DocumentOperation output = process(input); + DocumentOperation output = tester.process(input); assertSame(input, output); } - private void assertAssignment(String fieldName, String value, DocumentUpdate output) { - FieldUpdate update = output.getFieldUpdate(fieldName); - assertNotNull("Update of '" + fieldName + "' exists", update); - assertEquals(fieldName, update.getField().getName()); - assertEquals(1, update.getValueUpdates().size()); - ValueUpdate combinedAssignment = update.getValueUpdate(0); - assertTrue(combinedAssignment instanceof AssignValueUpdate); - assertEquals(new StringFieldValue(value), combinedAssignment.getValue()); - } - - private DocumentOperation process(DocumentOperation input) { - Processing proc = new Processing(); - proc.getDocumentOperations().add(input); - indexer.process(proc); - - List operations = proc.getDocumentOperations(); - if (operations.isEmpty()) return null; - assertEquals(1, operations.size()); - return operations.get(0); - } - - @SuppressWarnings("deprecation") - private static IndexingProcessor newProcessor(String configId) { - return new IndexingProcessor(new DocumentTypeManager(ConfigGetter.getConfig(DocumentmanagerConfig.class, configId)), - ConfigGetter.getConfig(IlscriptsConfig.class, configId), - new SimpleLinguistics(), - new ComponentRegistry<>()); - } } diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java new file mode 100644 index 000000000000..817817252fd3 --- /dev/null +++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java @@ -0,0 +1,72 @@ +package com.yahoo.docprocs.indexing; + +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.config.subscription.ConfigGetter; +import com.yahoo.docproc.Processing; +import com.yahoo.document.DocumentOperation; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.config.DocumentmanagerConfig; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.update.AssignValueUpdate; +import com.yahoo.document.update.FieldUpdate; +import com.yahoo.document.update.ValueUpdate; +import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; + +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * @author bratseth + */ +public class IndexingProcessorTester { + + private final IndexingProcessor indexer; + + public IndexingProcessorTester() { + indexer = newProcessor("raw:"); + } + + public IndexingProcessorTester(String configDir) { + indexer = newProcessor("dir:" + configDir); + } + + public DocumentType getDocumentType(String name) { + return indexer.getDocumentTypeManager().getDocumentType(name); + } + + public void assertAssignment(String fieldName, String value, DocumentUpdate output) { + FieldUpdate update = output.getFieldUpdate(fieldName); + assertNotNull("Update of '" + fieldName + "' exists", update); + assertEquals(fieldName, update.getField().getName()); + assertEquals(1, update.getValueUpdates().size()); + ValueUpdate combinedAssignment = update.getValueUpdate(0); + assertTrue(combinedAssignment instanceof AssignValueUpdate); + assertEquals(new StringFieldValue(value), combinedAssignment.getValue()); + } + + public DocumentOperation process(DocumentOperation input) { + Processing proc = new Processing(); + proc.getDocumentOperations().add(input); + indexer.process(proc); + + List operations = proc.getDocumentOperations(); + if (operations.isEmpty()) return null; + assertEquals(1, operations.size()); + return operations.get(0); + } + + @SuppressWarnings("deprecation") + private static IndexingProcessor newProcessor(String configId) { + return new IndexingProcessor(new DocumentTypeManager(ConfigGetter.getConfig(DocumentmanagerConfig.class, configId)), + ConfigGetter.getConfig(IlscriptsConfig.class, configId), + new SimpleLinguistics(), + new ComponentRegistry<>()); + } + +} diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java index f38e03a3aed6..fdebde6c4945 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java @@ -55,7 +55,7 @@ protected void doVerify(VerificationContext context) { @Override protected void doExecute(ExecutionContext context) { StringFieldValue input = (StringFieldValue) context.getCurrentValue(); - if (DataType.INT.equals(targetType) || requireOutputType().equals(DataType.INT)) + if (DataType.INT.equals(targetType) || ( ! DataType.LONG.equals(targetType) && requireOutputType().equals(DataType.INT))) context.setCurrentValue(new IntegerFieldValue(hashToInt(input.getString()))); else if (DataType.LONG.equals(targetType) || requireOutputType().equals(DataType.LONG)) context.setCurrentValue(new LongFieldValue(hashToLong(input.getString()))); diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/LowerCaseExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/LowerCaseExpression.java index a7d77c5342d4..66a9ac57038e 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/LowerCaseExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/LowerCaseExpression.java @@ -17,7 +17,7 @@ public LowerCaseExpression() { @Override public DataType setInputType(DataType inputType, VerificationContext context) { - return super.setInputType(inputType, context); + return super.setInputType(inputType, DataType.STRING, context); } @Override From 7cd2ec7363ac5db3e36da8d3c64b5bf886db7ffa Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Thu, 14 Nov 2024 15:02:09 +0000 Subject: [PATCH 031/126] Deinline frequency sketch counter division This by definition happens very rarely relative to how often the caller is invoked, so don't inline it. --- vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h index 6b0fe4787138..1df6c8cee1a9 100644 --- a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h +++ b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h @@ -72,7 +72,7 @@ class RawRelativeFrequencySketch { [[nodiscard]] size_t window_size() const noexcept { return _window_size; } private: - void div_all_by_2() noexcept; + void div_all_by_2() noexcept __attribute__((noinline)); template uint8_t add_by_hash_impl(uint64_t hash) noexcept; From 5d1a954c670be4a0c3c5c7d5259327a4b1bc8e75 Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Thu, 14 Nov 2024 16:12:38 +0100 Subject: [PATCH 032/126] Allow prepare/activate of loadbalancers to fail during bootstrap deployment --- .../config/provision/ActivationContext.java | 6 +++++- .../config/server/ApplicationRepository.java | 15 +++++++++---- .../config/server/deploy/Deployment.java | 4 ++-- .../provision/provisioning/Activator.java | 21 ++++++++++++++----- .../provisioning/InfraDeployerImpl.java | 2 +- .../NodeRepositoryProvisioner.java | 4 ++-- .../provision/provisioning/Preparer.java | 8 ++++++- .../provision/testutils/MockDeployer.java | 2 +- .../testutils/MockNodeRepository.java | 2 +- .../provision/RealDataScenarioTest.java | 4 ++-- .../maintenance/FailedExpirerTest.java | 2 +- .../provision/maintenance/NodeFailTester.java | 4 ++-- .../provisioning/ProvisioningTester.java | 2 +- 13 files changed, 52 insertions(+), 24 deletions(-) diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ActivationContext.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ActivationContext.java index 5e28f61d41fc..74c7a133f475 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/ActivationContext.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ActivationContext.java @@ -9,12 +9,16 @@ public class ActivationContext { private final long generation; + private final boolean isBootstrap; - public ActivationContext(long generation) { + public ActivationContext(long generation, boolean isBootstrap) { this.generation = generation; + this.isBootstrap = isBootstrap; } /** Returns the application config generation we are activating */ public long generation() { return generation; } + /** Returns true if this deployment is done to bootstrap the config server */ + public boolean isBootstrap() { return isBootstrap; } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 95ccd9593843..457981109a53 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -34,7 +34,6 @@ import com.yahoo.config.provision.Zone; import com.yahoo.config.provision.exception.ActivationConflictException; import com.yahoo.container.jdisc.HttpResponse; -import com.yahoo.container.jdisc.SecretStoreProvider; import com.yahoo.container.jdisc.secretstore.SecretStore; import com.yahoo.docproc.jdisc.metric.NullMetric; import com.yahoo.io.IOUtils; @@ -113,7 +112,15 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; -import java.util.*; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.UnaryOperator; import java.util.logging.Level; @@ -873,7 +880,7 @@ public TesterSuspendedException(String message) { // ---------------- Session operations ---------------------------------------------------------------- - public Activation activate(Session session, ApplicationId applicationId, Tenant tenant, boolean force) { + public Activation activate(Session session, ApplicationId applicationId, Tenant tenant, boolean isBootstrap, boolean force) { NestedTransaction transaction = new NestedTransaction(); Optional applicationTransaction = hostProvisioner.map(provisioner -> provisioner.lock(applicationId)) .map(lock -> new ApplicationTransaction(lock, transaction)); @@ -885,7 +892,7 @@ public Activation activate(Session session, ApplicationId applicationId, Tenant transaction.add(deactivateCurrentActivateNew(activeSession, session, force)); if (applicationTransaction.isPresent()) { hostProvisioner.get().activate(session.getAllocatedHosts().getHosts(), - new ActivationContext(session.getSessionId()), + new ActivationContext(session.getSessionId(), isBootstrap), applicationTransaction.get()); applicationTransaction.get().nested().commit(); } else { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java index 6a5aabd0410f..bd3a081d2077 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java @@ -129,7 +129,7 @@ public long activate() { TimeoutBudget timeoutBudget = params.get().getTimeoutBudget(); timeoutBudget.assertNotTimedOut(() -> "Timeout exceeded when trying to activate '" + applicationId + "'"); - Activation activation = applicationRepository.activate(session, applicationId, tenant, params.get().force()); + Activation activation = applicationRepository.activate(session, applicationId, tenant, params.get().isBootstrap(), params.get().force()); waitForActivation(applicationId, timeoutBudget, activation); restartServicesIfNeeded(applicationId); storeReindexing(applicationId); @@ -270,7 +270,7 @@ private static void waitForResourcesOrTimeout(PrepareParams params, Session sess if (!params.waitForResourcesInPrepare() || provisioner.isEmpty()) return; Set preparedHosts = session.getAllocatedHosts().getHosts(); - ActivationContext context = new ActivationContext(session.getSessionId()); + ActivationContext context = new ActivationContext(session.getSessionId(), params.isBootstrap()); AtomicReference lastException = new AtomicReference<>(); while (true) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index c1cf70b3da63..1f7f207a4d61 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.provisioning; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ClusterMembership; @@ -17,6 +18,7 @@ import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; +import com.yahoo.yolean.Exceptions; import java.time.Instant; import java.util.ArrayList; @@ -26,6 +28,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.logging.Logger; import java.util.stream.Collectors; /** @@ -35,6 +38,8 @@ */ class Activator { + private static final Logger log = Logger.getLogger(Activator.class.getName()); + private final NodeRepository nodeRepository; private final Optional loadBalancerProvisioner; @@ -44,9 +49,9 @@ public Activator(NodeRepository nodeRepository, Optional hosts, long generation, ApplicationTransaction transaction) { - NodeList newActive = activateNodes(hosts, generation, transaction); - activateLoadBalancers(hosts, newActive, transaction); + public void activate(Collection hosts, ActivationContext context, ApplicationTransaction transaction) { + NodeList newActive = activateNodes(hosts, context.generation(), transaction); + activateLoadBalancers(hosts, newActive, transaction, context.isBootstrap()); } /** @@ -149,8 +154,14 @@ private void unreserveParentsOf(NodeList nodes) { } /** Activate load balancers */ - private void activateLoadBalancers(Collection hosts, NodeList newActive, ApplicationTransaction transaction) { - loadBalancerProvisioner.ifPresent(provisioner -> provisioner.activate(allClustersOf(hosts), newActive, transaction)); + private void activateLoadBalancers(Collection hosts, NodeList newActive, ApplicationTransaction transaction, boolean isBootstrap) { + try { + loadBalancerProvisioner.ifPresent(provisioner -> provisioner.activate(allClustersOf(hosts), newActive, transaction)); + } catch (RuntimeException e) { + if (isBootstrap) + log.warning("Failed to activate load balancers for " + transaction.application() + ": " + Exceptions.toMessageString(e) + " (Ignoring because bootstrap deployment)"); + throw e; + } } private static Set allClustersOf(Collection hosts) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java index 39c14be4d2b5..7099cbac4bc9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java @@ -107,7 +107,7 @@ public long activate() { removeApplication(application.getApplicationId()); } else { NestedTransaction nestedTransaction = new NestedTransaction(); - provisioner.activate(hostSpecs, new ActivationContext(0), new ApplicationTransaction(lock, nestedTransaction)); + provisioner.activate(hostSpecs, new ActivationContext(0, !application.getCapacity().canFail()), new ApplicationTransaction(lock, nestedTransaction)); nestedTransaction.commit(); duperModel.infraApplicationActivated( diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 8c056ad1a939..d44e7e11799b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -4,6 +4,7 @@ import com.yahoo.component.annotation.Inject; import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.CapacityPolicies; @@ -14,7 +15,6 @@ import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; -import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.ProvisionLogger; import com.yahoo.config.provision.Provisioner; import com.yahoo.config.provision.Zone; @@ -140,7 +140,7 @@ private NodeResources getNodeResources(ClusterSpec cluster, NodeResources nodeRe @Override public void activate(Collection hosts, ActivationContext context, ApplicationTransaction transaction) { validate(hosts); - activator.activate(hosts, context.generation(), transaction); + activator.activate(hosts, context, transaction); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index 0445e40e0698..1a7d116005fb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -70,7 +70,13 @@ public List prepare(ApplicationId application, ClusterSpec cluster, NodeSp log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " + requested.resources().orElse(NodeResources.unspecified())); - loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requested)); + try { + loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requested)); + } catch (RuntimeException e) { + if (!requested.canFail()) + log.warning("Failed to prepare load balancers for " + application + " " + cluster + ": " + Exceptions.toMessageString(e) + " (Ignoring because bootstrap deployment)"); + throw e; + } // Try preparing in memory without global unallocated lock. Most of the time there should be no changes, // and we can return nodes previously allocated. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index 1e3c74af2475..7880b1c063f8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -183,7 +183,7 @@ public long activate() { activations++; try (var lock = provisioner.lock(application.id)) { try (NestedTransaction t = new NestedTransaction()) { - provisioner.activate(preparedHosts, new ActivationContext(activations), new ApplicationTransaction(lock, t)); + provisioner.activate(preparedHosts, new ActivationContext(activations, false), new ApplicationTransaction(lock, t)); t.commit(); lastActivationTimes.put(application.id, clock.instant()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index d8d6b5ad0a2d..da8062162460 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -268,7 +268,7 @@ private void populate() { private void activate(List hosts, ApplicationId application, NodeRepositoryProvisioner provisioner) { try (var lock = provisioner.lock(application)) { NestedTransaction transaction = new NestedTransaction(); - provisioner.activate(hosts, new ActivationContext(0), new ApplicationTransaction(lock, transaction)); + provisioner.activate(hosts, new ActivationContext(0, false), new ApplicationTransaction(lock, transaction)); transaction.commit(); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/RealDataScenarioTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/RealDataScenarioTest.java index 50b30ef6b3a7..5c49fcbf43d9 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/RealDataScenarioTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/RealDataScenarioTest.java @@ -5,6 +5,7 @@ import com.yahoo.config.model.builder.xml.XmlHelper; import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.Cloud; @@ -16,7 +17,6 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; @@ -119,7 +119,7 @@ private void deploy(ProvisioningTester tester, ApplicationId app, ClusterSpec[] .flatMap(Collection::stream) .toList(); NestedTransaction transaction = new NestedTransaction(); - tester.provisioner().activate(hostSpecs, new ActivationContext(0), new ApplicationTransaction(new ApplicationMutex(app, () -> {}), transaction)); + tester.provisioner().activate(hostSpecs, new ActivationContext(0, false), new ApplicationTransaction(new ApplicationMutex(app, () -> {}), transaction)); transaction.commit(); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java index abe789bc968b..a33de70384cd 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java @@ -362,7 +362,7 @@ public FailureScenario allocate(ApplicationId applicationId, ClusterSpec cluster (level, message) -> System.out.println(level + ": " + message) ); try (var lock = provisioner.lock(applicationId)) { NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator)); - provisioner.activate(Set.copyOf(preparedNodes), new ActivationContext(0), new ApplicationTransaction(lock, transaction)); + provisioner.activate(Set.copyOf(preparedNodes), new ActivationContext(0, !capacity.canFail()), new ApplicationTransaction(lock, transaction)); transaction.commit(); } return this; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java index 999b398f8077..726558381126 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java @@ -26,11 +26,11 @@ import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.InMemoryProvisionLogger; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; import com.yahoo.vespa.hosted.provision.testutils.ServiceMonitorStub; import com.yahoo.vespa.service.duper.InfraApplication; import com.yahoo.vespa.service.duper.TenantHostApplication; -import com.yahoo.vespa.hosted.provision.testutils.InMemoryProvisionLogger; import java.time.Clock; import java.time.Duration; @@ -274,7 +274,7 @@ public void activate(ApplicationId applicationId, ClusterSpec cluster, Capacity List hosts = provisioner.prepare(applicationId, cluster, capacity, new InMemoryProvisionLogger()); try (var lock = provisioner.lock(applicationId)) { NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator)); - provisioner.activate(hosts, new ActivationContext(0), new ApplicationTransaction(lock, transaction)); + provisioner.activate(hosts, new ActivationContext(0, !capacity.canFail()), new ApplicationTransaction(lock, transaction)); transaction.commit(); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 4239e21e01a9..00dcd30dfc0c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -247,7 +247,7 @@ public Collection activate(ApplicationId application, Collection Date: Thu, 14 Nov 2024 16:26:06 +0100 Subject: [PATCH 033/126] feat: Add support for continuation in YQL Queries in LSP --- .../schemals/schemadocument/YQLDocument.java | 86 +++++++++++++++++-- .../java/ai/vespa/schemals/YQLParserTest.java | 75 +++++++++++++++- 2 files changed, 152 insertions(+), 9 deletions(-) diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java index d4a23ec004bb..e6dffbbe2287 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java @@ -114,6 +114,57 @@ private static YQLPartParseResult parseYQLPart(CharSequence content, ClientLogge return new YQLPartParseResult(List.of(), Optional.of(retNode), charsRead); } + private static int findContinuationLength(String inputString) { + + // BUG: This never check if the curly bracket are in a string or something else + + char[] charArr = inputString.toCharArray(); + int continuationStart = -1; + for (int i = 0; i < charArr.length; i++) { + if (!Character.isWhitespace(charArr[i])) { + if (charArr[i] != '{') { + return 0; + } + + continuationStart = i; + break; + + } + } + if (continuationStart == -1) return 0; + + + int level = 0; + int continuationEnd = charArr.length; + for (int i = continuationStart; i < charArr.length; i++) { + if (charArr[i] == '{') level++; + if (charArr[i] == '}') level--; + + if (level == 0) { + continuationEnd = i + 1; + break; + }; + } + + return continuationEnd; + } + + private static ParseResult parseContinuation(String inputString, Position offset) { + + YQLPlusParser parser = new YQLPlusParser(inputString); + + try { + parser.map_expression(); + } catch (ParseException exception) { + // Ignored, marked as dirty node + } + + var node = parser.rootNode(); + YQLNode retNode = new YQLNode(node, offset); + + return new ParseResult(List.of(), Optional.of(retNode)); + } + private static YQLPartParseResult parseYQLQuery(ParseContext context, String queryString, Position offset) { YQLNode ret = new YQLNode(new Range(offset, offset)); @@ -139,14 +190,35 @@ private static YQLPartParseResult parseYQLQuery(ParseContext context, String que Position groupOffset = CSTUtils.addPositions(groupOffsetWithoutPipe, new Position(0, 1)); // Add pipe char ret.addChild(new YQLNode(new Range(groupOffsetWithoutPipe, groupOffset), "|")); - - YQLPartParseResult groupingResult = VespaGroupingParser.parseVespaGrouping(groupingString, context.logger(), groupOffset); - if (groupingResult.CST.isPresent()) { - ret.addChild(groupingResult.CST.get()); + charsRead++; + + // Look for continuation + int continuationLength = findContinuationLength(groupingString); + if (continuationLength != 0) { + String continuationString = groupingString.substring(0, continuationLength); + ParseResult continuationResults = parseContinuation(continuationString, groupOffset); + + diagnostics.addAll(continuationResults.diagnostics()); + if (continuationResults.CST().isPresent()) { + ret.addChild(continuationResults.CST().get()); + } + + charsRead += continuationLength; + groupingString = groupingString.substring(continuationLength); + Position continuationPosition = StringUtils.getStringPosition(continuationString); + groupOffset = CSTUtils.addPositions(groupOffset, continuationPosition); + } + + if (groupingString.length() > 0 && groupingString.strip().length() > 0) { + YQLPartParseResult groupingResult = VespaGroupingParser.parseVespaGrouping(groupingString, context.logger(), groupOffset); + if (groupingResult.CST.isPresent()) { + ret.addChild(groupingResult.CST.get()); + } + + diagnostics.addAll(groupingResult.diagnostics()); + charsRead += groupingResult.charsRead(); // Add one for the pipe symbol } - diagnostics.addAll(groupingResult.diagnostics()); - charsRead += 1 + groupingResult.charsRead(); // Add one for the pipe symbol } } @@ -190,7 +262,7 @@ public static ParseResult parseContent(ParseContext context) { charsRead = newOffset; } - YQLUtils.printTree(context.logger(), ret); + // YQLUtils.printTree(context.logger(), ret); return new ParseResult(diagnostics, Optional.of(ret)); } diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java index 15338f4eb5f9..d5808b23dd5a 100644 --- a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java +++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java @@ -44,6 +44,68 @@ void checkQueryParses(int expectedErrors, String input) throws Exception { @TestFactory Stream generateGoodTests() { + String[] groupingQueries = new String[] { + + // From docs: /en/grouping.html + "all( group(customer) each(output(sum(price))) )", + "all(group(customer) max(2) precision(12) order(-count()) each(output(sum(price))))", + "all(group(customer) each(max(3) each(output(summary()))))", + "all(group(a) max(5) each(output(count())))", + "all(group(a) max(5) each(output(count()) max(7) each(output(summary()))))", + "all(all(group(a) max(3) each(output(count()) max(5) each(output(summary())))) all(group(b) max(3) each(output(count()) max(5) each(output(summary())))))", + "all(group(a) max(5) each(output(count()) max(7) each(output(summary()))))", + "all(group(a) each(output(count()) each(output(summary()))))", + "all(group(customer) each(group(time.date(date)) each(output(sum(price)))))", + "all(group(customer) each(max(1) output(sum(price)) each(output(summary()))) each(group(time.date(date)) each(max(10) output(sum(price)) each(output(summary())))))", + "all(group(price) each(each(output(summary()))))", + "all(group(price/1000) each(each(output(summary()))))", + "all(group(fixedwidth(price,1000)) each(each(output(summary()))))", + "all(group(predefined(price, bucket(0,1000), bucket(1000,2000), bucket(2000,5000), bucket(5000,inf))) each(each(output(summary()))))", + "all(group(predefined(price, bucket[0,1000>, bucket[1000,2000>, bucket[2000,5000>, bucket[5000,inf>)) each(each(output(summary()))))", + "all(group(predefined(customer, bucket(-inf,\"Jones\"), bucket(\"Jones\", inf))) each(each(output(summary()))))", + "all(group(predefined(customer, bucket<-inf,\"Jones\">, bucket[\"Jones\"], bucket<\"Jones\", inf>)) each(each(output(summary()))))", + "all(group(predefined(tax, bucket[0.0,0.2>, bucket[0.2,0.5>, bucket[0.5,inf>)) each(each(output(summary()))))", + // "{ 'continuations':['BGAAABEBCA'] }all(output(count()))", + // "{ 'continuations':['BGAAABEBCA', 'BGAAABEBEBC'] }all(output(count()))", + "all(group(mod(div(date,mul(60,60)),24)) each(output(sum(price))))", + "all(group(customer) each(output(sum(mul(price,sub(1,tax))))))", + "all( group(a) each(output(count())) )", + "all( all(group(a) each(output(count()))) all(group(b) each(output(count()))) )", + "all( max(1000) all(group(a) each(output(count()))) )", + "all( group(a % 5) each(output(count())) )", + "all( group(a + b * c) each(output(count())) )", + "all( group(a % 5) order(sum(b)) each(output(count())) )", + "all( group(a + b * c) order(max(d)) each(output(count())) )", + "all( group(a) order(avg(relevance()) * count()) each(output(count())) )", + "all(group(a) order(max(attr) * count()) each(output(count())) )", + "all( group(a) each(max(1) each(output(summary()))) )", + "all( group(a) each(max(1) output(count(), sum(b)) each(output(summary()))) )", + "all(group(a) each(max(1) output(count(), sum(b), xor(md5(cat(a, b, c), 64))) each(output(summary()))))", + "all( group(a) max(5) each(max(69) output(count()) each(output(summary()))) )", + "all( group(a) max(5) each(output(count()) all(group(b) max(5) each(max(69) output(count()) each(output(summary()))))) )", + "all( group(a) max(5) each(output(count()) all(group(b) max(5) each(output(count()) all(group(c) max(5) each(max(69) output(count()) each(output(summary()))))) )))", + "all( group(a) max(5) each(output(count()) all(group(b) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(c) max(5) each(max(69) output(count()) each(output(summary()))))) )))", + "all( group(a) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(b) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(c) max(5) each(max(69) output(count()) each(output(summary()))))) )))", + "all( group(a) max(5) each(output(count()) all(max(1) each(output(summary(complexsummary)))) all(group(b) max(5) each(output(count()) all(max(1) each(output(summary(simplesummary)))) all(group(c) max(5) each(max(69) output(count()) each(output(summary(fastsummary)))))) )))", + "all( group(a) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(b) each(output(count()) all(max(1) each(output(summary()))) all(group(c) each(output(count()) all(max(1) each(output(summary())))))))) )))", + "all( group(time.year(a)) each(output(count())) )", + "all( group(time.year(a)) each(output(count()) all(group(time.monthofyear(a)) each(output(count())))) )", + "all( group(time.year(a)) each(output(count()) all(group(time.monthofyear(a)) each(output(count()) all(group(time.dayofmonth(a)) each(output(count()) all(group(time.hourofday(a)) each(output(count())))))))) )", + "all( group(predefined((now() - a) / (60 * 60 * 24), bucket(0,1), bucket(1,2), bucket(3,7), bucket(8,31))) each(output(count()) all(max(2) each(output(summary()))) all(group((now() - a) / (60 * 60 * 24)) each(output(count()) all(max(2) each(output(summary())))))) )", + "all( group(a) output(count()) )", + "all( group(strlen(name)) output(count()) )", + "all( group(a) output(count()) each(output(sum(b))) )", + "all( group(a) max(3) output(count()) each(output(sum(b))) )", + "all( group(a) max(10) output(count()) each(group(b) output(count())) )", + "all(group(1) each(output(avg(rating))))", + "all( group(predefined(rating, bucket[-inf, 0>, bucket[0, inf>)) each(output(count())) )", + "all( group(predefined(rating, bucket[-inf, 0>, bucket[0, inf>)) order(max(rating)) max(1) each( max(100) each(output(summary(name_only)))) )", + }; + + for (int i = 0; i < groupingQueries.length; i++) { + groupingQueries[i] = "select * from sources * where true | " + groupingQueries[i]; + } + String[] queries = new String[] { "select * from music", "select * from sources * where range(title, 0.0, 500.0)", // /container-search/src/test/java/com/yahoo/select/SelectTestCase.java @@ -111,11 +173,19 @@ Stream generateGoodTests() { "select * from music where title contains \"madonna\" timeout 70", "select * from music where userInput(@userinput)", "select * from music where text contains ({distance: 5}near(\"a\", \"b\")) and text contains ({distance:2}near(\"c\", \"d\"))", + "select * from music where ({bounds:\"rightOpen\"}range(year, 2000, 2018))", + "select * from music where text contains ({distance: 5}near(\"a\", \"b\"))", + "select * from music where myUrlField.hostname contains uri(\"vespa.ai\")", + "select * from music where myUrlField.hostname contains ({startAnchor: true}uri(\"vespa.ai\"))", + "select * from music where title contains ({weight:200}\"heads\")", + "select * from sources * where ({stem: false}(foo contains \"a\" and bar contains \"b\")) or foo contains {stem: false}\"c\"", + "select * from sources * where foo contains @animal and foo contains phrase(@animal, @syntaxExample, @animal)", + "select * from sources * where sddocname contains 'purchase' | all(group(customer) each(output(sum(price))))", }; + Stream queryStream = Stream.concat(Arrays.stream(queries), Arrays.stream(groupingQueries)); - return Arrays.stream(queries) - .map(query -> DynamicTest.dynamicTest(query, () -> checkQueryParses(0, query))); + return queryStream.map(query -> DynamicTest.dynamicTest(query, () -> checkQueryParses(0, query))); } private record TestWithError(int expectedErrors, String query) {} @@ -124,6 +194,7 @@ private record TestWithError(int expectedErrors, String query) {} Stream InvalidQuery() throws Exception { var queries = new TestWithError[] { new TestWithError(1, "seletc *"), + // new TestWithError(1, "select * from sources * where true | all(group(a) order(attr * count()) each(output(count())) )"), }; return Arrays.stream(queries) From 0038068e149007b2fbfd71dfdd37cad05c511d5c Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 14 Nov 2024 16:48:02 +0100 Subject: [PATCH 034/126] Stop forcing memory map of bitvector files if bitvector cache is enabled. --- .../src/vespa/searchlib/diskindex/field_index.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index 37b568221199..8ead0de77d1c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -145,10 +145,12 @@ FieldIndex::open(const std::string& field_dir, const TuneFileSearch& tune_file_s } bDict = std::make_shared(); - // Always memory map bitvectors for now - auto force_mmap = tune_file_search._read; - force_mmap.setWantMemoryMap(); - if (!bDict->open(field_dir, force_mmap, BitVectorKeyScope::PERFIELD_WORDS)) { + // memory map bitvectors unless bitvector cache is enabled + auto maybe_force_mmap = tune_file_search._read; + if (!_bitvector_cache_enabled) { + maybe_force_mmap.setWantMemoryMap(); + } + if (!bDict->open(field_dir, maybe_force_mmap, BitVectorKeyScope::PERFIELD_WORDS)) { LOG(warning, "Could not open bit vector dictionary in '%s'", field_dir.c_str()); return false; } @@ -242,7 +244,7 @@ FieldIndex::read(const IPostingListCache::BitVectorKey& key, IPostingListCache:: std::shared_ptr FieldIndex::read_bit_vector(BitVectorDictionaryLookupResult lookup_result) const { - if (!_bit_vector_dict) { + if (!_bit_vector_dict || !lookup_result.valid()) { return {}; } if (_bit_vector_dict->get_memory_mapped() || !_bitvector_cache_enabled) { From 6a64484e28fd30db08ae7111992496fcd807154b Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 14 Nov 2024 17:37:52 +0100 Subject: [PATCH 035/126] Add copyright --- .../com/yahoo/docprocs/indexing/IndexingProcessorTester.java | 1 + 1 file changed, 1 insertion(+) diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java index 817817252fd3..9a24861ebe06 100644 --- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java +++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.docprocs.indexing; import com.yahoo.component.provider.ComponentRegistry; From 6ffd0bb464c91a47330da07ef93e60abb3a6a851 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 14 Nov 2024 21:44:54 +0100 Subject: [PATCH 036/126] Trim down posting list cache key, use context to pass backing store file. --- .../posting_list_cache_test.cpp | 21 ++++++++++++------- .../vespa/searchlib/diskindex/field_index.cpp | 11 +++++----- .../vespa/searchlib/diskindex/field_index.h | 2 +- .../diskindex/i_posting_list_cache.h | 7 +++---- .../diskindex/posting_list_cache.cpp | 10 ++++----- .../searchlib/diskindex/posting_list_cache.h | 2 +- 6 files changed, 29 insertions(+), 24 deletions(-) diff --git a/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp b/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp index 1a1ac8164dab..665310eb9070 100644 --- a/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp +++ b/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp @@ -15,7 +15,7 @@ class MockFile : public PostingListCache::IPostingListFileBacking { public: MockFile(); ~MockFile() override; - PostingListHandle read(const PostingListCache::Key& key) const override; + PostingListHandle read(const PostingListCache::Key& key, PostingListCache::Context& ctx) const override; std::shared_ptr read(const PostingListCache::BitVectorKey& key, PostingListCache::Context& ctx) const override; }; @@ -27,9 +27,10 @@ MockFile::MockFile() MockFile::~MockFile() = default; PostingListHandle -MockFile::read(const PostingListCache::Key& key) const +MockFile::read(const PostingListCache::Key& key, PostingListCache::Context& ctx) const { EXPECT_NE(0, key.bit_length); + ctx.cache_miss = true; PostingListHandle handle; handle._allocSize = key.bit_length / 8; return handle; @@ -57,8 +58,14 @@ class PostingListCacheTest : public ::testing::Test PostingListCache::Context _ctx; PostingListCacheTest(); ~PostingListCacheTest() override; - PostingListHandle read() const { return _cache.read(_key); } - std::shared_ptr read_bv() { return _cache.read(_bv_key, _ctx); } + PostingListHandle read() { + _ctx.cache_miss = false; + return _cache.read(_key, _ctx); + } + std::shared_ptr read_bv() { + _ctx.cache_miss = false; + return _cache.read(_bv_key, _ctx); + } }; PostingListCacheTest::PostingListCacheTest() @@ -69,7 +76,6 @@ PostingListCacheTest::PostingListCacheTest() _bv_key(), _ctx(&_mock_file) { - _key.backing_store_file = &_mock_file; } PostingListCacheTest::~PostingListCacheTest() = default; @@ -78,8 +84,11 @@ TEST_F(PostingListCacheTest, repeated_lookups_gives_hit) { _key.bit_length = 24 * 8; auto handle = read(); + EXPECT_TRUE(_ctx.cache_miss); auto handle2 = read(); + EXPECT_FALSE(_ctx.cache_miss); auto handle3 = read(); + EXPECT_FALSE(_ctx.cache_miss); EXPECT_EQ(24, handle._allocSize); auto stats = _cache.get_stats(); EXPECT_EQ(1, stats.misses); @@ -125,10 +134,8 @@ TEST_F(PostingListCacheTest, repeated_bitvector_lookup_gives_hit) { _bv_key.lookup_result.idx = 1; _bv_key.file_id = 2; - _ctx.cache_miss = false; auto bv = read_bv(); EXPECT_TRUE(_ctx.cache_miss); - _ctx.cache_miss = false; auto bv2 = read_bv(); EXPECT_FALSE(_ctx.cache_miss); EXPECT_EQ(bv, bv2); diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index 37b568221199..f66f86d1ff57 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -180,12 +180,12 @@ FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_resu } PostingListHandle -FieldIndex::read(const IPostingListCache::Key& key) const +FieldIndex::read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const { + ctx.cache_miss = true; DictionaryLookupResult lookup_result; lookup_result.bitOffset = key.bit_offset; lookup_result.counts._bitLength = key.bit_length; - key.backing_store_file = nullptr; // Signal cache miss back to layer above cache return read_uncached_posting_list(lookup_result); } @@ -200,13 +200,12 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const return read_uncached_posting_list(lookup_result); } IPostingListCache::Key key; - key.backing_store_file = this; key.file_id = _file_id; key.bit_offset = lookup_result.bitOffset; key.bit_length = lookup_result.counts._bitLength; - auto result = _posting_list_cache->read(key); - auto cache_hit = key.backing_store_file == this; - if (cache_hit && result._read_bytes != 0) { + IPostingListCache::Context ctx(this); + auto result = _posting_list_cache->read(key, ctx); + if (!ctx.cache_miss && result._read_bytes != 0) { _cache_disk_io_stats->add_cached_read_operation(result._read_bytes); } return result; diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index 304591bb1139..6a39c4982488 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -73,7 +73,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { bool open(const std::string& field_dir, const TuneFileSearch &tune_file_search); void reuse_files(const FieldIndex& rhs); index::PostingListHandle read_uncached_posting_list(const search::index::DictionaryLookupResult& lookup_result) const; - index::PostingListHandle read(const IPostingListCache::Key& key) const override; + index::PostingListHandle read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const override; index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const; index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const; std::shared_ptr read_uncached_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; diff --git a/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h b/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h index d10c28e0b695..4f9d29494356 100644 --- a/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h +++ b/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h @@ -18,11 +18,10 @@ class IPostingListCache { public: class IPostingListFileBacking; struct Key { - mutable const IPostingListFileBacking* backing_store_file; // Used by backing store on cache miss uint64_t file_id; uint64_t bit_offset; uint64_t bit_length; - Key() noexcept : backing_store_file(nullptr), file_id(0), bit_offset(0), bit_length(0) { } + Key() noexcept : file_id(0), bit_offset(0), bit_length(0) { } size_t hash() const noexcept { return std::rotl(file_id, 40) + bit_offset; } bool operator==(const Key& rhs) const noexcept { // Don't check backing_store_file, it is just passed in key for convenience @@ -56,11 +55,11 @@ class IPostingListCache { class IPostingListFileBacking { public: virtual ~IPostingListFileBacking() = default; - virtual search::index::PostingListHandle read(const Key& key) const = 0; + virtual search::index::PostingListHandle read(const Key& key, Context& ctx) const = 0; virtual std::shared_ptr read(const BitVectorKey& key, Context& ctx) const = 0; }; virtual ~IPostingListCache() = default; - virtual search::index::PostingListHandle read(const Key& key) const = 0; + virtual search::index::PostingListHandle read(const Key& key, Context& ctx) const = 0; virtual std::shared_ptr read(const BitVectorKey& key, Context& ctx) const = 0; virtual vespalib::CacheStats get_stats() const = 0; virtual vespalib::CacheStats get_bitvector_stats() const = 0; diff --git a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp index 7dc33a27574b..f81b2427a5d6 100644 --- a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp @@ -19,7 +19,7 @@ class PostingListCache::BackingStore public: BackingStore(); ~BackingStore(); - bool read(const Key& key, PostingListHandle& value) const; + bool read(const Key& key, PostingListHandle& value, Context& ctx) const; bool read(const BitVectorKey& key, std::shared_ptr& value, Context& ctx) const; }; @@ -27,10 +27,10 @@ PostingListCache::BackingStore::BackingStore() = default; PostingListCache::BackingStore::~BackingStore() = default; bool -PostingListCache::BackingStore::read(const Key& key, PostingListHandle& value) const +PostingListCache::BackingStore::read(const Key& key, PostingListHandle& value, Context& ctx) const { // TODO: Store a smaller copy if posting list is small - value = key.backing_store_file->read(key); + value = ctx.backing_store_file->read(key, ctx); return true; } @@ -104,9 +104,9 @@ PostingListCache::PostingListCache(size_t max_bytes, size_t bitvector_max_bytes) PostingListCache::~PostingListCache() = default; PostingListHandle -PostingListCache::read(const Key& key) const +PostingListCache::read(const Key& key, Context& ctx) const { - return _cache->read(key); + return _cache->read(key, ctx); } std::shared_ptr diff --git a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h index ad4d21374cd2..6599ae281226 100644 --- a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h +++ b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h @@ -22,7 +22,7 @@ class PostingListCache : public IPostingListCache { public: PostingListCache(size_t max_bytes, size_t bitvector_max_bytes); ~PostingListCache() override; - search::index::PostingListHandle read(const Key& key) const override; + search::index::PostingListHandle read(const Key& key, Context& ctx) const override; std::shared_ptr read(const BitVectorKey& key, Context& ctx) const override; vespalib::CacheStats get_stats() const override; vespalib::CacheStats get_bitvector_stats() const override; From 0eff3b55c4d9af4078f58200eab801733fe99a56 Mon Sep 17 00:00:00 2001 From: Eirik Nygaard Date: Fri, 15 Nov 2024 09:29:04 +0100 Subject: [PATCH 037/126] SSO enforcement on email domain level --- flags/src/main/java/com/yahoo/vespa/flags/Flags.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index bb96361ae41e..3e02c61bd986 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -491,6 +491,13 @@ public class Flags { + "e.g. when running tests to avoid writing a large, sparse, mostly unused file", "Takes effect on restart of Docker container"); + public static final UnboundBooleanFlag ENFORCE_EMAIL_DOMAIN_SSO = defineFeatureFlag( + "enforce-email-domain-sso", false, + List.of("eirik"), "2024-11-07", "2025-02-07", + "Enforce SSO login for an email domain", + "Takes effect immediately", + CONSOLE_USER_EMAIL); + /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List owners, String createdAt, String expiresAt, String description, From cc850e9a034d31d6b314cb4dad3fcdd45670afcf Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Wed, 13 Nov 2024 13:08:12 +0100 Subject: [PATCH 038/126] Use switch expression --- .../java/com/yahoo/security/KeyAlgorithm.java | 11 ++++ .../java/com/yahoo/security/KeyUtils.java | 63 +++++++++---------- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java b/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java index 0cfc988249e9..e301b47861ce 100644 --- a/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java +++ b/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java @@ -9,6 +9,7 @@ * @author bjorncs */ public enum KeyAlgorithm { + RSA("RSA", null), EC("EC", new ECGenParameterSpec("prime256v1")); // TODO Make curve configurable @@ -25,4 +26,14 @@ String getAlgorithmName() { } Optional getSpec() { return Optional.ofNullable(spec); } + + public static KeyAlgorithm from(String name) { + for (var algorithm : values()) { + if (name.equals(algorithm.getAlgorithmName())) { + return algorithm; + } + } + throw new IllegalArgumentException("Unknown key algorithm '" + name + "'"); + } + } diff --git a/security-utils/src/main/java/com/yahoo/security/KeyUtils.java b/security-utils/src/main/java/com/yahoo/security/KeyUtils.java index 0cccd05121dc..3cad203bd3a4 100644 --- a/security-utils/src/main/java/com/yahoo/security/KeyUtils.java +++ b/security-utils/src/main/java/com/yahoo/security/KeyUtils.java @@ -78,23 +78,24 @@ public static KeyPair generateKeypair(KeyAlgorithm algorithm) { } public static PublicKey extractPublicKey(PrivateKey privateKey) { - String algorithm = privateKey.getAlgorithm(); + KeyAlgorithm keyAlgorithm = KeyAlgorithm.from(privateKey.getAlgorithm()); try { - if (algorithm.equals(RSA.getAlgorithmName())) { - KeyFactory keyFactory = createKeyFactory(RSA); - RSAPrivateCrtKey rsaPrivateCrtKey = (RSAPrivateCrtKey) privateKey; - RSAPublicKeySpec keySpec = new RSAPublicKeySpec(rsaPrivateCrtKey.getModulus(), rsaPrivateCrtKey.getPublicExponent()); - return keyFactory.generatePublic(keySpec); - } else if (algorithm.equals(EC.getAlgorithmName())) { - KeyFactory keyFactory = createKeyFactory(EC); - BCECPrivateKey ecPrivateKey = (BCECPrivateKey) privateKey; - ECParameterSpec ecParameterSpec = ecPrivateKey.getParameters(); - ECPoint ecPoint = new FixedPointCombMultiplier().multiply(ecParameterSpec.getG(), ecPrivateKey.getD()); - ECPublicKeySpec keySpec = new ECPublicKeySpec(ecPoint, ecParameterSpec); - return keyFactory.generatePublic(keySpec); - } else { - throw new IllegalArgumentException("Unexpected key algorithm: " + algorithm); - } + return switch (keyAlgorithm) { + case RSA -> { + KeyFactory keyFactory = createKeyFactory(RSA); + RSAPrivateCrtKey rsaPrivateCrtKey = (RSAPrivateCrtKey) privateKey; + RSAPublicKeySpec keySpec = new RSAPublicKeySpec(rsaPrivateCrtKey.getModulus(), rsaPrivateCrtKey.getPublicExponent()); + yield keyFactory.generatePublic(keySpec); + } + case EC -> { + KeyFactory keyFactory = createKeyFactory(EC); + BCECPrivateKey ecPrivateKey = (BCECPrivateKey) privateKey; + ECParameterSpec ecParameterSpec = ecPrivateKey.getParameters(); + ECPoint ecPoint = new FixedPointCombMultiplier().multiply(ecParameterSpec.getG(), ecPrivateKey.getD()); + ECPublicKeySpec keySpec = new ECPublicKeySpec(ecPoint, ecParameterSpec); + yield keyFactory.generatePublic(keySpec); + } + }; } catch (GeneralSecurityException e) { throw new RuntimeException(e); } @@ -168,14 +169,10 @@ public static String toPem(PrivateKey privateKey) { } public static String toPem(PrivateKey privateKey, KeyFormat format) { - switch (format) { - case PKCS1: - return toPkcs1Pem(privateKey); - case PKCS8: - return toPkcs8Pem(privateKey); - default: - throw new IllegalArgumentException("Unknown format: " + format); - } + return switch (format) { + case PKCS1 -> toPkcs1Pem(privateKey); + case PKCS8 -> toPkcs8Pem(privateKey); + }; } public static String toPem(PublicKey publicKey) { @@ -190,15 +187,11 @@ public static String toPem(PublicKey publicKey) { private static String toPkcs1Pem(PrivateKey privateKey) { try (StringWriter stringWriter = new StringWriter(); JcaPEMWriter pemWriter = new JcaPEMWriter(stringWriter)) { - String algorithm = privateKey.getAlgorithm(); - String type; - if (algorithm.equals(RSA.getAlgorithmName())) { - type = "RSA PRIVATE KEY"; - } else if (algorithm.equals(EC.getAlgorithmName())) { - type = "EC PRIVATE KEY"; - } else { - throw new IllegalArgumentException("Unexpected key algorithm: " + algorithm); - } + KeyAlgorithm keyAlgorithm = KeyAlgorithm.from(privateKey.getAlgorithm()); + String type = switch (keyAlgorithm) { + case RSA -> "RSA PRIVATE KEY"; + case EC -> "EC PRIVATE KEY"; + }; pemWriter.writeObject(new PemObject(type, getPkcs1Bytes(privateKey))); pemWriter.flush(); return stringWriter.toString(); @@ -227,9 +220,9 @@ private static byte[] getPkcs1Bytes(PrivateKey privateKey) throws IOException{ private static KeyFactory createKeyFactory(AlgorithmIdentifier algorithm) throws NoSuchAlgorithmException { if (X9ObjectIdentifiers.id_ecPublicKey.equals(algorithm.getAlgorithm())) { - return createKeyFactory(KeyAlgorithm.EC); + return createKeyFactory(EC); } else if (PKCSObjectIdentifiers.rsaEncryption.equals(algorithm.getAlgorithm())) { - return createKeyFactory(KeyAlgorithm.RSA); + return createKeyFactory(RSA); } else { throw new IllegalArgumentException("Unknown key algorithm: " + algorithm); } From 861ca5bbf36bf74aea5d19786633bf7c5050b8c7 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Fri, 15 Nov 2024 10:00:19 +0000 Subject: [PATCH 039/126] Use proper bit width for hash mask shift --- .../src/vespa/vespalib/util/relative_frequency_sketch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp index 89042d703d31..fe826b24c746 100644 --- a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp +++ b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp @@ -47,7 +47,7 @@ RawRelativeFrequencySketch::~RawRelativeFrequencySketch() = default; */ template uint8_t RawRelativeFrequencySketch::add_by_hash_impl(uint64_t hash) noexcept { - const uint64_t block = hash & ((1u << _block_mask_bits) - 1); + const uint64_t block = hash & ((1ULL << _block_mask_bits) - 1); hash >>= _block_mask_bits; assert(block*64 + 64 <= _buf.size()); auto* block_ptr = static_cast(_buf.get()) + (block * 64); @@ -96,7 +96,7 @@ uint8_t RawRelativeFrequencySketch::add_and_count_by_hash(uint64_t hash) noexcep * counter decaying). */ uint8_t RawRelativeFrequencySketch::count_min_by_hash(uint64_t hash) const noexcept { - const uint64_t block = hash & ((1u << _block_mask_bits) - 1); + const uint64_t block = hash & ((1ULL << _block_mask_bits) - 1); hash >>= _block_mask_bits; const uint8_t* block_ptr = static_cast(_buf.get()) + (block * 64); uint8_t cm[4]; From 4b6688f33193e574cd619ce0bfca4406e055a164 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Fri, 15 Nov 2024 11:20:49 +0100 Subject: [PATCH 040/126] Support PEM-encoding of X25519 keys --- .../java/com/yahoo/security/KeyAlgorithm.java | 7 ++++- .../java/com/yahoo/security/KeyUtils.java | 28 ++++++++++-------- .../java/com/yahoo/security/KeyUtilsTest.java | 29 ++++++++++++------- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java b/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java index e301b47861ce..5ed207eac614 100644 --- a/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java +++ b/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java @@ -11,7 +11,8 @@ public enum KeyAlgorithm { RSA("RSA", null), - EC("EC", new ECGenParameterSpec("prime256v1")); // TODO Make curve configurable + EC("EC", new ECGenParameterSpec("prime256v1")), + XDH("XDH", new ECGenParameterSpec("X25519")); final String algorithmName; private final AlgorithmParameterSpec spec; @@ -31,6 +32,10 @@ public static KeyAlgorithm from(String name) { for (var algorithm : values()) { if (name.equals(algorithm.getAlgorithmName())) { return algorithm; + } else if (algorithm == XDH && name.equals("X25519")) { + // "XDH" is the name used by the JDK for elliptic curve keys using Curve25519, while BouncyCastle uses + // "X25519" + return algorithm; } } throw new IllegalArgumentException("Unknown key algorithm '" + name + "'"); diff --git a/security-utils/src/main/java/com/yahoo/security/KeyUtils.java b/security-utils/src/main/java/com/yahoo/security/KeyUtils.java index 3cad203bd3a4..d22e0269b4c0 100644 --- a/security-utils/src/main/java/com/yahoo/security/KeyUtils.java +++ b/security-utils/src/main/java/com/yahoo/security/KeyUtils.java @@ -3,6 +3,7 @@ import org.bouncycastle.asn1.ASN1Encodable; import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.edec.EdECObjectIdentifiers; import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers; import org.bouncycastle.asn1.pkcs.PrivateKeyInfo; import org.bouncycastle.asn1.x509.AlgorithmIdentifier; @@ -50,6 +51,7 @@ import static com.yahoo.security.KeyAlgorithm.EC; import static com.yahoo.security.KeyAlgorithm.RSA; +import static com.yahoo.security.KeyAlgorithm.XDH; /** * @author bjorncs @@ -95,6 +97,12 @@ public static PublicKey extractPublicKey(PrivateKey privateKey) { ECPublicKeySpec keySpec = new ECPublicKeySpec(ecPoint, ecParameterSpec); yield keyFactory.generatePublic(keySpec); } + case XDH -> { + byte[] privScalar = toRawX25519PrivateKeyBytes((XECPrivateKey) privateKey); + byte[] pubPoint = new byte[X25519.POINT_SIZE]; + X25519.generatePublicKey(privScalar, 0, pubPoint, 0); // scalarMultBase => public key point + yield fromRawX25519PublicKey(pubPoint); + } }; } catch (GeneralSecurityException e) { throw new RuntimeException(e); @@ -128,7 +136,7 @@ public static PrivateKey fromPemEncodedPrivateKey(String pem) { unknownObjects.add(pemObject); } } - throw new IllegalArgumentException("Expected a private key, but found " + unknownObjects.toString()); + throw new IllegalArgumentException("Expected a private key, but found " + unknownObjects); } catch (IOException e) { throw new UncheckedIOException(e); } catch (GeneralSecurityException e) { @@ -191,6 +199,7 @@ private static String toPkcs1Pem(PrivateKey privateKey) { String type = switch (keyAlgorithm) { case RSA -> "RSA PRIVATE KEY"; case EC -> "EC PRIVATE KEY"; + case XDH -> throw new IllegalArgumentException("Cannot use PKCS#1 for X25519 key"); }; pemWriter.writeObject(new PemObject(type, getPkcs1Bytes(privateKey))); pemWriter.flush(); @@ -223,6 +232,8 @@ private static KeyFactory createKeyFactory(AlgorithmIdentifier algorithm) throws return createKeyFactory(EC); } else if (PKCSObjectIdentifiers.rsaEncryption.equals(algorithm.getAlgorithm())) { return createKeyFactory(RSA); + } else if (EdECObjectIdentifiers.id_X25519.equals(algorithm.getAlgorithm())) { + return createKeyFactory(XDH); } else { throw new IllegalArgumentException("Unknown key algorithm: " + algorithm); } @@ -331,21 +342,14 @@ public static String toBase58EncodedX25519PrivateKey(XECPrivateKey privateKey) { return Base58.codec().encode(toRawX25519PrivateKeyBytes(privateKey)); } - // TODO unify with generateKeypair()? + // TODO: In-line and remove public static KeyPair generateX25519KeyPair() { - try { - return KeyPairGenerator.getInstance("X25519").generateKeyPair(); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } + return generateKeypair(XDH); } - // TODO unify with extractPublicKey() + // TODO: In-line and remove public static XECPublicKey extractX25519PublicKey(XECPrivateKey privateKey) { - byte[] privScalar = toRawX25519PrivateKeyBytes(privateKey); - byte[] pubPoint = new byte[X25519.POINT_SIZE]; - X25519.generatePublicKey(privScalar, 0, pubPoint, 0); // scalarMultBase => public key point - return fromRawX25519PublicKey(pubPoint); + return (XECPublicKey) extractPublicKey(privateKey); } /** diff --git a/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java b/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java index aa1e9861a67f..8bd150d08e75 100644 --- a/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java +++ b/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java @@ -13,6 +13,7 @@ import static com.yahoo.security.ArrayUtils.unhex; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -55,26 +56,34 @@ void can_serialize_and_deserialize_ec_privatekey_using_pkcs8_pem_format() { testPrivateKeySerialization(KeyAlgorithm.EC, KeyFormat.PKCS8, "PRIVATE KEY"); } + @Test + void can_serialize_and_deserialize_x25519_private_key_using_pkcs8_pem_format() { + testPrivateKeySerialization(KeyAlgorithm.XDH, KeyFormat.PKCS8, "PRIVATE KEY"); + } + @Test void can_serialize_and_deserialize_rsa_publickey_using_pem_format() { - KeyPair keyPair = KeyUtils.generateKeypair(KeyAlgorithm.RSA); - String pem = KeyUtils.toPem(keyPair.getPublic()); - assertTrue(pem.contains("BEGIN PUBLIC KEY")); - assertTrue(pem.contains("END PUBLIC KEY")); - PublicKey deserializedKey = KeyUtils.fromPemEncodedPublicKey(pem); - assertEquals(keyPair.getPublic(), deserializedKey); - assertEquals(KeyAlgorithm.RSA.getAlgorithmName(), deserializedKey.getAlgorithm()); + testPublicKeySerialization(KeyAlgorithm.RSA); } @Test void can_serialize_and_deserialize_ec_publickey_using_pem_format() { - KeyPair keyPair = KeyUtils.generateKeypair(KeyAlgorithm.EC); + testPublicKeySerialization(KeyAlgorithm.EC); + } + + @Test + void can_serialize_and_deserialize_x25519_publickey_using_pem_format() { + testPublicKeySerialization(KeyAlgorithm.XDH); + } + + private static void testPublicKeySerialization(KeyAlgorithm keyAlgorithm) { + KeyPair keyPair = KeyUtils.generateKeypair(keyAlgorithm); String pem = KeyUtils.toPem(keyPair.getPublic()); assertTrue(pem.contains("BEGIN PUBLIC KEY")); assertTrue(pem.contains("END PUBLIC KEY")); PublicKey deserializedKey = KeyUtils.fromPemEncodedPublicKey(pem); assertEquals(keyPair.getPublic(), deserializedKey); - assertEquals(KeyAlgorithm.EC.getAlgorithmName(), deserializedKey.getAlgorithm()); + assertSame(keyAlgorithm, KeyAlgorithm.from(deserializedKey.getAlgorithm())); } private static void testPrivateKeySerialization(KeyAlgorithm keyAlgorithm, KeyFormat keyFormat, String pemLabel) { @@ -84,7 +93,7 @@ private static void testPrivateKeySerialization(KeyAlgorithm keyAlgorithm, KeyFo assertTrue(pem.contains("END " + pemLabel)); PrivateKey deserializedKey = KeyUtils.fromPemEncodedPrivateKey(pem); assertEquals(keyPair.getPrivate(), deserializedKey); - assertEquals(keyAlgorithm.getAlgorithmName(), deserializedKey.getAlgorithm()); + assertSame(keyAlgorithm, KeyAlgorithm.from(deserializedKey.getAlgorithm())); } private static XECPrivateKey xecPrivFromHex(String hex) { From f7c74e5a50b397299c13d9004c8db317f4091ef1 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Fri, 15 Nov 2024 11:27:51 +0100 Subject: [PATCH 041/126] Expect PEM-encoded sealing key --- .../vespa/hosted/provision/backup/Snapshots.java | 12 ++++++++---- .../vespa/hosted/provision/backup/SnapshotsTest.java | 5 ++--- .../provision/provisioning/ProvisioningTester.java | 5 ++--- .../hosted/provision/restapi/NodesV2ApiTest.java | 5 ++--- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java index 6175d0daf2c7..7d6266375c64 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java @@ -10,6 +10,7 @@ import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.SnapshotId; +import com.yahoo.security.KeyAlgorithm; import com.yahoo.security.KeyId; import com.yahoo.security.KeyUtils; import com.yahoo.security.SealedSharedKey; @@ -28,9 +29,8 @@ import com.yahoo.vespa.hosted.provision.provisioning.SnapshotStore; import java.security.KeyPair; +import java.security.PrivateKey; import java.security.PublicKey; -import java.security.interfaces.XECPrivateKey; -import java.security.interfaces.XECPublicKey; import java.time.Instant; import java.util.ArrayList; import java.util.List; @@ -196,8 +196,12 @@ private VersionedKeyPair sealingKeyPair(SecretVersionId version) { } Key key = Key.fromString(sealingPrivateKeySecretName.get()); Secret sealingPrivateKey = version == null ? secretStore.getSecret(key) : secretStore.getSecret(key, version); - XECPrivateKey privateKey = KeyUtils.fromBase64EncodedX25519PrivateKey(sealingPrivateKey.secretValue().value()); - XECPublicKey publicKey = KeyUtils.extractX25519PublicKey(privateKey); + PrivateKey privateKey = KeyUtils.fromPemEncodedPrivateKey(sealingPrivateKey.secretValue().value()); + PublicKey publicKey = KeyUtils.extractPublicKey(privateKey); + if (KeyAlgorithm.from(privateKey.getAlgorithm()) != KeyAlgorithm.XDH) { + throw new IllegalArgumentException("Expected sealing key to use algorithm " + KeyAlgorithm.XDH + + ", but got " + privateKey.getAlgorithm()); + } return new VersionedKeyPair(new KeyPair(publicKey, privateKey), sealingPrivateKey.version()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java index 0f8b54247b2b..b4f909acb25f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java @@ -9,6 +9,7 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.security.KeyFormat; import com.yahoo.security.KeyUtils; import com.yahoo.security.SealedSharedKey; import com.yahoo.vespa.hosted.provision.Node; @@ -17,7 +18,6 @@ import java.security.KeyPair; import java.security.PublicKey; -import java.security.interfaces.XECPrivateKey; import java.util.List; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -54,8 +54,7 @@ void snapshot() { // Sealing key can be rotated independently of existing snapshots KeyPair keyPair = KeyUtils.generateX25519KeyPair(); tester.secretStore().add(new Secret(Key.fromString("snapshot/sealingPrivateKey"), - KeyUtils.toBase64EncodedX25519PrivateKey((XECPrivateKey) keyPair.getPrivate()) - .getBytes(), + KeyUtils.toPem(keyPair.getPrivate(), KeyFormat.PKCS8).getBytes(), SecretVersionId.of("2"))); assertEquals(SecretVersionId.of("1"), snapshots.require(snapshot0.id(), node0).key().sealingKeyVersion()); assertNotEquals(snapshot0.key().sharedKey(), snapshots.keyOf(snapshot0.id(), node0, receiverPublicKey), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 00dcd30dfc0c..9bdfd2850150 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -33,6 +33,7 @@ import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; import com.yahoo.jdisc.test.MockMetric; +import com.yahoo.security.KeyFormat; import com.yahoo.security.KeyUtils; import com.yahoo.test.ManualClock; import com.yahoo.transaction.NestedTransaction; @@ -69,7 +70,6 @@ import com.yahoo.vespa.service.duper.TenantHostApplication; import java.security.KeyPair; -import java.security.interfaces.XECPrivateKey; import java.time.temporal.TemporalAmount; import java.util.ArrayList; import java.util.Collection; @@ -772,8 +772,7 @@ private SecretStoreMock defaultSecretStore() { SecretStoreMock secretStore = new SecretStoreMock(); KeyPair keyPair = KeyUtils.generateX25519KeyPair(); secretStore.add(new Secret(Key.fromString("snapshot/sealingPrivateKey"), - KeyUtils.toBase64EncodedX25519PrivateKey((XECPrivateKey) keyPair.getPrivate()) - .getBytes(), + KeyUtils.toPem(keyPair.getPrivate(), KeyFormat.PKCS8).getBytes(), SecretVersionId.of("1"))); return secretStore; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java index 04e03dd60d45..c33e45661f9e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java @@ -11,6 +11,7 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.TenantName; +import com.yahoo.security.KeyFormat; import com.yahoo.security.KeyUtils; import com.yahoo.slime.SlimeUtils; import com.yahoo.text.Utf8; @@ -28,7 +29,6 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.security.KeyPair; -import java.security.interfaces.XECPrivateKey; import java.security.interfaces.XECPublicKey; import java.time.Duration; import java.util.Arrays; @@ -876,8 +876,7 @@ public void test_snapshots() throws IOException { .getComponent(SecretStoreMock.class.getName()); KeyPair keyPair = KeyUtils.generateX25519KeyPair(); secretStore.add(new Secret(Key.fromString("snapshot/sealingPrivateKey"), - KeyUtils.toBase64EncodedX25519PrivateKey((XECPrivateKey) keyPair.getPrivate()) - .getBytes(), + KeyUtils.toPem(keyPair.getPrivate(), KeyFormat.PKCS8).getBytes(), SecretVersionId.of("1"))); // Trigger creation of snapshots From 75783d3fb82e88220b90b5d9c898563793f22bcc Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 15 Nov 2024 14:24:42 +0100 Subject: [PATCH 042/126] Move DiskIoMetrics out of IndexMetricsEntry. --- .../searchcore/proton/metrics/CMakeLists.txt | 1 + .../proton/metrics/disk_io_metrics.cpp | 44 +++++++++++++++++++ .../proton/metrics/disk_io_metrics.h | 34 ++++++++++++++ .../proton/metrics/index_metrics_entry.cpp | 32 -------------- .../proton/metrics/index_metrics_entry.h | 21 +-------- 5 files changed, 80 insertions(+), 52 deletions(-) create mode 100644 searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp create mode 100644 searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h diff --git a/searchcore/src/vespa/searchcore/proton/metrics/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/metrics/CMakeLists.txt index dae64398293f..6ea9a976cc4d 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/metrics/CMakeLists.txt @@ -5,6 +5,7 @@ vespa_add_library(searchcore_proton_metrics STATIC attribute_metrics_entry.cpp cache_metrics.cpp content_proton_metrics.cpp + disk_io_metrics.cpp documentdb_job_trackers.cpp documentdb_tagged_metrics.cpp document_db_commit_metrics.cpp diff --git a/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp new file mode 100644 index 000000000000..4cd5e4d3c35e --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp @@ -0,0 +1,44 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "disk_io_metrics.h" +#include + +using search::CacheDiskIoStats; +using search::DiskIoStats; + +namespace proton { + +namespace { + +void update_helper(metrics::LongValueMetric &metric, const DiskIoStats &stats) { + metric.addTotalValueBatch(stats.read_bytes_total(), stats.read_operations(), + stats.read_bytes_min(), stats.read_bytes_max()); +} + +} + +DiskIoMetrics::SearchMetrics::SearchMetrics(metrics::MetricSet* parent) + : MetricSet("search", {}, "The search io for a given component", parent), + _read_bytes("read_bytes", {}, "Bytes read in posting list files as part of search", this), + _cached_read_bytes("cached_read_bytes", {}, "Bytes read from posting list files cache as part of search", this) +{ +} + +DiskIoMetrics::SearchMetrics::~SearchMetrics() = default; + +void +DiskIoMetrics::SearchMetrics::update(const CacheDiskIoStats& cache_disk_io_stats) +{ + update_helper(_read_bytes, cache_disk_io_stats.read()); + update_helper(_cached_read_bytes, cache_disk_io_stats.cached_read()); +} + +DiskIoMetrics::DiskIoMetrics(metrics::MetricSet* parent) + : MetricSet("io", {}, "The disk usage for a given component", parent), + _search(this) +{ +} + +DiskIoMetrics::~DiskIoMetrics() = default; + +} diff --git a/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h new file mode 100644 index 000000000000..b03969aee75e --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h @@ -0,0 +1,34 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { class CacheDiskIoStats; } + +namespace proton { + +/* + * Class containing disk io metrics, e.g. per index field or + * aggregated at document type level. + */ +class DiskIoMetrics : public metrics::MetricSet { + class SearchMetrics : public metrics::MetricSet { + metrics::LongValueMetric _read_bytes; + metrics::LongValueMetric _cached_read_bytes; + public: + explicit SearchMetrics(metrics::MetricSet* parent); + ~SearchMetrics() override; + void update(const search::CacheDiskIoStats& cache_disk_io_stats); + }; + + SearchMetrics _search; + +public: + explicit DiskIoMetrics(metrics::MetricSet* parent); + ~DiskIoMetrics() override; + void update(const search::CacheDiskIoStats& cache_disk_io_stats) { _search.update(cache_disk_io_stats); } +}; + +} diff --git a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp index 3c3617cf14ba..d62f22a52710 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp @@ -3,9 +3,6 @@ #include "index_metrics_entry.h" #include -using search::CacheDiskIoStats; -using search::DiskIoStats; - namespace proton { namespace { @@ -13,37 +10,8 @@ namespace { const std::string entry_name("index"); const std::string entry_description("Metrics for indexes for a given field"); -void update_helper(metrics::LongValueMetric &metric, const DiskIoStats &stats) { - metric.addTotalValueBatch(stats.read_bytes_total(), stats.read_operations(), - stats.read_bytes_min(), stats.read_bytes_max()); -} - -} - -IndexMetricsEntry::DiskIoMetrics::SearchMetrics::SearchMetrics(metrics::MetricSet* parent) - : MetricSet("search", {}, "The search io for a given component", parent), - _read_bytes("read_bytes", {}, "Bytes read in posting list files as part of search", this), - _cached_read_bytes("cached_read_bytes", {}, "Bytes read from posting list files cache as part of search", this) -{ -} - -IndexMetricsEntry::DiskIoMetrics::SearchMetrics::~SearchMetrics() = default; - -void -IndexMetricsEntry::DiskIoMetrics::SearchMetrics::update(const CacheDiskIoStats& cache_disk_io_stats) -{ - update_helper(_read_bytes, cache_disk_io_stats.read()); - update_helper(_cached_read_bytes, cache_disk_io_stats.cached_read()); } -IndexMetricsEntry::DiskIoMetrics::DiskIoMetrics(metrics::MetricSet* parent) - : MetricSet("io", {}, "The disk usage for a given component", parent), - _search(this) -{ -} - -IndexMetricsEntry::DiskIoMetrics::~DiskIoMetrics() = default; - IndexMetricsEntry::IndexMetricsEntry(const std::string& field_name) : FieldMetricsEntry(entry_name, field_name, entry_description), _disk_io(this) diff --git a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h index c2783d99e0e4..03d9bb7d5506 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h @@ -2,10 +2,9 @@ #pragma once +#include "disk_io_metrics.h" #include "field_metrics_entry.h" -namespace search { class CacheDiskIoStats; } - namespace proton { /* @@ -13,24 +12,6 @@ namespace proton { * disk indexes and memory indexes. */ class IndexMetricsEntry : public FieldMetricsEntry { - class DiskIoMetrics : public metrics::MetricSet { - class SearchMetrics : public metrics::MetricSet { - metrics::LongValueMetric _read_bytes; - metrics::LongValueMetric _cached_read_bytes; - public: - explicit SearchMetrics(metrics::MetricSet* parent); - ~SearchMetrics() override; - void update(const search::CacheDiskIoStats& cache_disk_io_stats); - }; - - SearchMetrics _search; - - public: - explicit DiskIoMetrics(metrics::MetricSet* parent); - ~DiskIoMetrics() override; - void update(const search::CacheDiskIoStats& cache_disk_io_stats) { _search.update(cache_disk_io_stats); } - }; - DiskIoMetrics _disk_io; public: From 46f0ad495e9b1c28aa2d6dcc598bc57ac5d53ec3 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 15 Nov 2024 14:36:36 +0100 Subject: [PATCH 043/126] Add disk io metrics for document type. --- .../searchcore/proton/metrics/documentdb_tagged_metrics.cpp | 3 ++- .../searchcore/proton/metrics/documentdb_tagged_metrics.h | 1 + .../searchcore/proton/server/documentdb_metrics_updater.cpp | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp index c1e87a834b6c..af03133edcd3 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp @@ -93,7 +93,8 @@ DocumentDBTaggedMetrics::IndexMetrics::IndexMetrics(MetricSet *parent) : MetricSet("index", {}, "Index metrics (memory and disk) for this document db", parent), diskUsage("disk_usage", {}, "Disk space usage in bytes", this), memoryUsage(this), - docsInMemory("docs_in_memory", {}, "Number of documents in memory index", this) + docsInMemory("docs_in_memory", {}, "Number of documents in memory index", this), + disk_io(this) { } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h index 6191678c2bda..66fed96263fd 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h @@ -97,6 +97,7 @@ struct DocumentDBTaggedMetrics : metrics::MetricSet metrics::LongValueMetric diskUsage; MemoryUsageMetrics memoryUsage; metrics::LongValueMetric docsInMemory; + DiskIoMetrics disk_io; IndexMetrics(metrics::MetricSet *parent); ~IndexMetrics() override; diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp index 73fa84685cb0..02633cffeb36 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp @@ -80,6 +80,7 @@ updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::SearchableSta updateMemoryUsageMetrics(indexMetrics.memoryUsage, stats.memoryUsage(), totalStats); indexMetrics.docsInMemory.set(stats.docsInMemory()); auto& field_metrics = metrics.ready.index; + search::CacheDiskIoStats disk_io; for (auto& field : stats.get_field_stats()) { auto entry = field_metrics.get_field_metrics_entry(field.first); if (entry) { @@ -87,7 +88,9 @@ updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::SearchableSta entry->size_on_disk.set(field.second.size_on_disk()); entry->update_disk_io(field.second.cache_disk_io_stats()); } + disk_io.merge(field.second.cache_disk_io_stats()); } + indexMetrics.disk_io.update(disk_io); } struct TempAttributeMetric From 77bcaca99b06afc70b09f58c3550e0fd5b80211a Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 15 Nov 2024 23:32:39 +0100 Subject: [PATCH 044/126] Use posix_memalign to allocate buffer for directio read. --- vespalib/src/vespa/fastos/linux_file.cpp | 25 +++++++++++----------- vespalib/src/vespa/vespalib/util/alloc.cpp | 3 ++- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/vespalib/src/vespa/fastos/linux_file.cpp b/vespalib/src/vespa/fastos/linux_file.cpp index 0f32aa953a81..185fca7baa4c 100644 --- a/vespalib/src/vespa/fastos/linux_file.cpp +++ b/vespalib/src/vespa/fastos/linux_file.cpp @@ -10,14 +10,15 @@ #ifdef __linux__ #include "file.h" #include "file_rw_ops.h" -#include #include #include #include +#include +#include #include #include +#include #include -#include using fastos::File_RW_Ops; @@ -282,14 +283,6 @@ FastOS_Linux_File::SetSize(int64_t newSize) return rc; } - -namespace { - void * align(void * p, size_t alignment) { - const size_t alignMask(alignment-1); - return reinterpret_cast((reinterpret_cast(p) + alignMask) & ~alignMask); - } -} - void * FastOS_Linux_File::AllocateDirectIOBuffer (size_t byteSize, void *&realPtr) { @@ -297,9 +290,15 @@ FastOS_Linux_File::AllocateDirectIOBuffer (size_t byteSize, void *&realPtr) size_t memoryAlignment; GetDirectIORestrictions(memoryAlignment, dummy1, dummy2); - - realPtr = malloc(byteSize + memoryAlignment - 1); - return align(realPtr, memoryAlignment); + memoryAlignment = std::max(memoryAlignment, sizeof(void*)); + int result = posix_memalign(&realPtr, memoryAlignment, byteSize); + if (result != 0) { + std::ostringstream os; + os << "posix_memalign(&realPtr, " << memoryAlignment << ", " << byteSize << ") failed with code " << result << + " : " << getErrorString(result); + throw std::runtime_error(os.str()); + } + return realPtr; } size_t diff --git a/vespalib/src/vespa/vespalib/util/alloc.cpp b/vespalib/src/vespa/vespalib/util/alloc.cpp index a2f124c6aa96..fd2f3f669513 100644 --- a/vespalib/src/vespa/vespalib/util/alloc.cpp +++ b/vespalib/src/vespa/vespalib/util/alloc.cpp @@ -315,7 +315,8 @@ AlignedHeapAllocator::alloc(size_t sz) const { void* ptr; int result = posix_memalign(&ptr, _alignment, sz); if (result != 0) { - throw IllegalArgumentException(make_string("posix_memalign(%zu, %zu) failed with code %d", sz, _alignment, result)); + throw IllegalArgumentException(make_string("posix_memalign(&ptr, %zu, %zu) failed with code %d : %s", + _alignment, sz, result, getErrorString(result).c_str())); } return PtrAndSize(ptr, sz); } From 61f04695eba93dbee67912a157dc6a75fb7c969e Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Sat, 16 Nov 2024 16:15:17 +0100 Subject: [PATCH 045/126] Drop tracking of extra "real" pointer wen allocating buffer for direct io. --- .../searchlib/diskindex/zcposoccrandread.cpp | 7 +++--- vespalib/src/vespa/fastos/file.cpp | 9 ++++---- vespalib/src/vespa/fastos/file.h | 23 +++++++------------ vespalib/src/vespa/fastos/linux_file.cpp | 9 ++++---- vespalib/src/vespa/fastos/linux_file.h | 2 +- 5 files changed, 21 insertions(+), 29 deletions(-) diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp index 0537aa320ef2..4694017a7d4b 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -113,11 +113,10 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) } size_t mallocLen = padBefore + vectorLen + padAfter + padExtraAfter; - void *mallocStart = nullptr; void *alignedBuffer = nullptr; if (mallocLen > 0) { - alignedBuffer = _file->AllocateDirectIOBuffer(mallocLen, mallocStart); - assert(mallocStart != nullptr); + alignedBuffer = _file->AllocateDirectIOBuffer(mallocLen); + assert(alignedBuffer != nullptr); assert(endOffset + padAfter + padExtraAfter <= _fileSize); _file->ReadBuf(alignedBuffer, padBefore + vectorLen + padAfter, @@ -130,7 +129,7 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) padExtraAfter); } handle._mem = static_cast(alignedBuffer) + padBefore; - handle._allocMem = std::shared_ptr(mallocStart, free); + handle._allocMem = std::shared_ptr(alignedBuffer, free); handle._allocSize = mallocLen; handle._read_bytes = padBefore + vectorLen + padAfter; } diff --git a/vespalib/src/vespa/fastos/file.cpp b/vespalib/src/vespa/fastos/file.cpp index e3e7b385fa2f..a4d9d1ee5c95 100644 --- a/vespalib/src/vespa/fastos/file.cpp +++ b/vespalib/src/vespa/fastos/file.cpp @@ -165,10 +165,9 @@ FastOS_FileInterface::DirectIOPadding(int64_t offset, void * -FastOS_FileInterface::allocateGenericDirectIOBuffer(size_t byteSize, void *&realPtr) +FastOS_FileInterface::allocateIOBuffer(size_t byteSize) { - realPtr = malloc(byteSize); // Default - use malloc allignment - return realPtr; + return malloc(byteSize); // Default - use malloc allignment } size_t @@ -178,9 +177,9 @@ FastOS_FileInterface::getMaxDirectIOMemAlign() } void * -FastOS_FileInterface::AllocateDirectIOBuffer(size_t byteSize, void *&realPtr) +FastOS_FileInterface::AllocateDirectIOBuffer(size_t byteSize) { - return allocateGenericDirectIOBuffer(byteSize, realPtr); + return allocateIOBuffer(byteSize); } void diff --git a/vespalib/src/vespa/fastos/file.h b/vespalib/src/vespa/fastos/file.h index ec2def0adc87..9ab562e7204b 100644 --- a/vespalib/src/vespa/fastos/file.h +++ b/vespalib/src/vespa/fastos/file.h @@ -393,16 +393,12 @@ class FastOS_FileInterface size_t &padAfter); /** - * Allocate a buffer properly alligned with regards to direct io - * access restrictions. + * Allocate a buffer for normal io. * @param byteSize Number of bytes to be allocated - * @param realPtr Reference where the actual pointer returned - * from malloc will be saved. Use free() with - * this pointer to deallocate the buffer. - * This value is always set. - * @return Alligned pointer value or nullptr if out of memory + * @return pointer value or nullptr if out of memory + * Use free() with this pointer to deallocate the buffer. */ - static void *allocateGenericDirectIOBuffer(size_t byteSize, void *&realPtr); + static void *allocateIOBuffer(size_t byteSize); /** * Get maximum memory alignment for directio buffers. @@ -411,16 +407,13 @@ class FastOS_FileInterface static size_t getMaxDirectIOMemAlign(); /** - * Allocate a buffer properly alligned with regards to direct io + * Allocate a buffer properly aligned with regards to direct io * access restrictions. * @param byteSize Number of bytes to be allocated - * @param realPtr Reference where the actual pointer returned - * from malloc will be saved. Use free() with - * this pointer to deallocate the buffer. - * This value is always set. - * @return Alligned pointer value or nullptr if out of memory + * @return Aligned pointer value or nullptr if out of memory. + * Use free() with this pointer to deallocate the buffer. */ - virtual void *AllocateDirectIOBuffer(size_t byteSize, void *&realPtr); + virtual void *AllocateDirectIOBuffer(size_t byteSize); /** * Enable mapping of complete file contents into the address space of the diff --git a/vespalib/src/vespa/fastos/linux_file.cpp b/vespalib/src/vespa/fastos/linux_file.cpp index 185fca7baa4c..3a69f6c5c17b 100644 --- a/vespalib/src/vespa/fastos/linux_file.cpp +++ b/vespalib/src/vespa/fastos/linux_file.cpp @@ -284,21 +284,22 @@ FastOS_Linux_File::SetSize(int64_t newSize) } void * -FastOS_Linux_File::AllocateDirectIOBuffer (size_t byteSize, void *&realPtr) +FastOS_Linux_File::AllocateDirectIOBuffer (size_t byteSize) { size_t dummy1, dummy2; size_t memoryAlignment; + void* ptr = nullptr; GetDirectIORestrictions(memoryAlignment, dummy1, dummy2); memoryAlignment = std::max(memoryAlignment, sizeof(void*)); - int result = posix_memalign(&realPtr, memoryAlignment, byteSize); + int result = posix_memalign(&ptr, memoryAlignment, byteSize); if (result != 0) { std::ostringstream os; - os << "posix_memalign(&realPtr, " << memoryAlignment << ", " << byteSize << ") failed with code " << result << + os << "posix_memalign(&ptr, " << memoryAlignment << ", " << byteSize << ") failed with code " << result << " : " << getErrorString(result); throw std::runtime_error(os.str()); } - return realPtr; + return ptr; } size_t diff --git a/vespalib/src/vespa/fastos/linux_file.h b/vespalib/src/vespa/fastos/linux_file.h index af6e6af51af2..a9239f9c331d 100644 --- a/vespalib/src/vespa/fastos/linux_file.h +++ b/vespalib/src/vespa/fastos/linux_file.h @@ -35,7 +35,7 @@ class FastOS_Linux_File final : public FastOS_UNIX_File int64_t getPosition() const override; bool SetSize(int64_t newSize) override; void ReadBuf(void *buffer, size_t length, int64_t readOffset) override; - void *AllocateDirectIOBuffer(size_t byteSize, void *&realPtr) override; + void *AllocateDirectIOBuffer(size_t byteSize) override; [[nodiscard]] ssize_t Read(void *buffer, size_t len) override; From 9921112605790e8020728bb07e13e0b0f2555bf1 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Sat, 16 Nov 2024 19:41:56 +0100 Subject: [PATCH 046/126] Add failing test showing that session are not deleted in right order Oldest should be deleted first, since we delete a limited number of sessions. This means old sessions might not be deleted until logn after they should have been, if there are many sessions created as well --- .../config/server/ApplicationRepository.java | 5 +-- .../maintenance/SessionsMaintainer.java | 11 ++++++- .../server/session/SessionRepository.java | 12 +++---- .../maintenance/SessionsMaintainerTest.java | 33 +++++++++++++++++-- 4 files changed, 49 insertions(+), 12 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 457981109a53..eca73af43d70 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -961,9 +961,10 @@ public long createSession(ApplicationId applicationId, TimeoutBudget timeoutBudg return session.getSessionId(); } - public void deleteExpiredSessions() { + public void deleteExpiredSessions(int maxSessionsToDelete) { tenantRepository.getAllTenants() - .forEach(tenant -> tenant.getSessionRepository().deleteExpiredRemoteAndLocalSessions(session -> sessionIsActiveForItsApplication(tenant, session))); + .forEach(tenant -> tenant.getSessionRepository().deleteExpiredRemoteAndLocalSessions(session -> sessionIsActiveForItsApplication(tenant, session), + maxSessionsToDelete)); } private boolean sessionIsActiveForItsApplication(Tenant tenant, Session session) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java index b584efbd55d8..1430b9002ce8 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java @@ -13,14 +13,23 @@ */ public class SessionsMaintainer extends ConfigServerMaintainer { + private final int maxSessionsToDelete; + SessionsMaintainer(ApplicationRepository applicationRepository, Curator curator, Duration interval) { super(applicationRepository, curator, applicationRepository.flagSource(), applicationRepository.clock(), interval, true, true); + this.maxSessionsToDelete = 50; + } + + SessionsMaintainer(ApplicationRepository applicationRepository, Curator curator, Duration interval, int maxSessionsToDelete) { + super(applicationRepository, curator, applicationRepository.flagSource(), applicationRepository.clock(), + interval, true, true); + this.maxSessionsToDelete = maxSessionsToDelete; } @Override protected double maintain() { - applicationRepository.deleteExpiredSessions(); + applicationRepository.deleteExpiredSessions(maxSessionsToDelete); return 1.0; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index accf78fe82dd..18cac54a67e3 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -237,11 +237,11 @@ private LocalSession getSessionFromFile(long sessionId) { return new LocalSession(tenantName, sessionId, applicationPackage, sessionZKClient); } - public Set getLocalSessionsIdsFromFileSystem() { + public List getLocalSessionsIdsFromFileSystem() { File[] sessions = tenantFileSystemDirs.sessionsPath().listFiles(sessionApplicationsFilter); - if (sessions == null) return Set.of(); + if (sessions == null) return List.of(); - Set sessionIds = new HashSet<>(); + List sessionIds = new ArrayList<>(); for (File session : sessions) { long sessionId = Long.parseLong(session.getName()); sessionIds.add(sessionId); @@ -601,9 +601,9 @@ public SessionData read(Session session) { // ---------------- Common stuff ---------------------------------------------------------------- - public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActiveForApplication) { + public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActiveForApplication, int maxSessionsToDelete) { // All known sessions, both local (file) and remote (zookeeper) - Set sessions = getLocalSessionsIdsFromFileSystem(); + List sessions = getLocalSessionsIdsFromFileSystem(); sessions.addAll(getRemoteSessionsFromZooKeeper()); log.log(Level.FINE, () -> "Sessions for tenant " + tenantName + ": " + sessions); @@ -613,7 +613,7 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi sessions.removeAll(newSessions); // Avoid deleting too many in one run - int deleteMax = (int) Math.min(1000, Math.max(50, sessions.size() * 0.05)); + int deleteMax = (int) Math.min(1000, Math.max(maxSessionsToDelete, sessions.size() * 0.05)); int deletedRemoteSessions = 0; int deletedLocalSessions = 0; for (Long sessionId : sessions) { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainerTest.java index 3178efa0677b..0404df0f3b19 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainerTest.java @@ -23,6 +23,7 @@ import java.nio.file.Files; import java.time.Duration; import java.util.ArrayList; +import java.util.List; import static com.yahoo.vespa.config.server.session.Session.Status.PREPARE; import static com.yahoo.vespa.config.server.session.Session.Status.UNKNOWN; @@ -179,19 +180,45 @@ public void testDeletingInactiveSessions3() throws IOException { assertFalse(applicationPath.toFile().exists()); // App has been deleted } + @Test + public void testDeletionOfOldestFirst() { + // Delete max 1 session + tester = createTester(new InMemoryFlagSource(), 1); + + // Deploy some sessions when time goes backwards, to be able to have another + // order of sessions than increasing with time. 3 is the oldest session, 4 is active, + // so 2 should be deleted when maintainer runs + tester.deployApp(testApp, prepareParams()); // session 2 (numbering starts at 2) + clock.retreat(Duration.ofMinutes(10)); + tester.deployApp(testApp, prepareParams()); // session 3 + clock.retreat(Duration.ofMinutes(10)); + tester.deployApp(testApp, prepareParams()); // session 4 + + clock.advance(Duration.ofMinutes(60)); + maintainer.run(); + + var sessions = sessionRepository.getRemoteSessionsFromZooKeeper(); + assertEquals(2, sessions.size()); + assertEquals(List.of(3L, 4L), sessions); + } + private MaintainerTester createTester() { return createTester(flagSource); } private MaintainerTester createTester(FlagSource flagSource) { + return createTester(flagSource, 50); + } + + private MaintainerTester createTester(FlagSource flagSource, int maxSessionsToDelete) { var tester = uncheck(() -> new MaintainerTester(clock, temporaryFolder, flagSource)); - return setup(tester); + return setup(tester, maxSessionsToDelete); } - private MaintainerTester setup(MaintainerTester tester) { + private MaintainerTester setup(MaintainerTester tester, int maxSessionsToDelete) { applicationRepository = tester.applicationRepository(); applicationRepository.tenantRepository().addTenant(applicationId.tenant()); - maintainer = new SessionsMaintainer(applicationRepository, tester.curator(), Duration.ofMinutes(1)); + maintainer = new SessionsMaintainer(applicationRepository, tester.curator(), Duration.ofMinutes(1), maxSessionsToDelete); sessionRepository = applicationRepository.getTenant(applicationId).getSessionRepository(); var serverdb = new File(applicationRepository.configserverConfig().configServerDBDir()); From 5985a8863772e45e17a59e2a325be4dc40d9db12 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Sat, 16 Nov 2024 20:15:15 +0100 Subject: [PATCH 047/126] Sort sessions before deleting --- .../com/yahoo/vespa/config/server/session/SessionRepository.java | 1 + 1 file changed, 1 insertion(+) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index 18cac54a67e3..ca988785cb89 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -611,6 +611,7 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi // we will exclude these) Set newSessions = findNewSessionsInFileSystem(); sessions.removeAll(newSessions); + Collections.sort(sessions); // Avoid deleting too many in one run int deleteMax = (int) Math.min(1000, Math.max(maxSessionsToDelete, sessions.size() * 0.05)); From 7f55c3e838f99be9a9062e26f9d46d9cd64aba11 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Sun, 17 Nov 2024 22:37:37 +0100 Subject: [PATCH 048/126] Minor refactoring, no functional changes --- .../server/modelfactory/ModelsBuilder.java | 61 +++++++++++-------- .../modelfactory/PreparedModelsBuilder.java | 8 +-- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java index 129e6c5f9c61..b55d5b3acff0 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java @@ -91,36 +91,23 @@ public List buildModels(ApplicationId applicationId, Instant now) { Instant start = Instant.now(); log.log(Level.FINE, () -> "Will build models for " + applicationId); - Set versions = modelFactoryRegistry.allVersions(); - - // If the application specifies a major, skip models on a newer major - Optional requestedMajorVersion = applicationPackage.getMajorVersion(); - if (requestedMajorVersion.isPresent()) { - versions = keepUpToMajorVersion(requestedMajorVersion.get(), versions); - if (versions.isEmpty()) - throw new UnknownVespaVersionException("No Vespa versions on or before major version " + - requestedMajorVersion.get() + " are present"); - } + Set versions = findVersionsToBuild(applicationPackage); // Load models one major version at a time (in reverse order) as new major versions are allowed // to be non-loadable in the case where an existing application is incompatible with a new // major version (which is possible by the definition of major) - List majorVersions = versions.stream() - .map(Version::getMajor) - .distinct() - .sorted(Comparator.reverseOrder()) - .toList(); + List majorVersions = majorVersionsNewestFirst(versions); - List allApplicationModels = new ArrayList<>(); + List builtModels = new ArrayList<>(); // Build latest model for latest major only, if that fails build latest model for previous major boolean buildLatestModelForThisMajor = true; for (int i = 0; i < majorVersions.size(); i++) { int majorVersion = majorVersions.get(i); try { - allApplicationModels.addAll(buildModelVersions(keepMajorVersion(majorVersion, versions), - applicationId, dockerImageRepository, wantedNodeVespaVersion, - applicationPackage, allocatedHosts, now, - buildLatestModelForThisMajor, majorVersion)); + builtModels.addAll(buildModelVersions(keepMajorVersion(majorVersion, versions), + applicationId, dockerImageRepository, wantedNodeVespaVersion, + applicationPackage, allocatedHosts, now, + buildLatestModelForThisMajor, majorVersion)); buildLatestModelForThisMajor = false; // We have successfully built latest model version, do it only for this major } catch (NodeAllocationException | ApplicationLockException | TransientException | QuotaExceededException e) { @@ -146,12 +133,34 @@ public List buildModels(ApplicationId applicationId, } } log.log(Level.FINE, () -> "Done building models for " + applicationId + ". Built models for versions " + - allApplicationModels.stream() - .map(result -> result.getModel().version()) - .map(Version::toFullString) - .collect(Collectors.toSet()) + + builtModels.stream() + .map(result -> result.getModel().version()) + .map(Version::toFullString) + .collect(Collectors.toSet()) + " in " + Duration.between(start, Instant.now())); - return allApplicationModels; + return builtModels; + } + + private Set findVersionsToBuild(ApplicationPackage applicationPackage) { + Set versions = modelFactoryRegistry.allVersions(); + + // If the application specifies a major, skip models on a newer major + Optional requestedMajorVersion = applicationPackage.getMajorVersion(); + if (requestedMajorVersion.isPresent()) { + versions = keepUpToMajorVersion(requestedMajorVersion.get(), versions); + if (versions.isEmpty()) + throw new UnknownVespaVersionException("No Vespa versions on or before major version " + + requestedMajorVersion.get() + " are present"); + } + return versions; + } + + private static List majorVersionsNewestFirst(Set versions) { + return versions.stream() + .map(Version::getMajor) + .distinct() + .sorted(Comparator.reverseOrder()) + .toList(); } private boolean shouldSkipCreatingMajorVersionOnError(List majorVersions, Integer majorVersion, Version wantedVersion, @@ -208,7 +217,7 @@ private List buildModelVersions(Set versions, } catch (RuntimeException e) { // allow failure to create old config models if there is a validation override that allow skipping old // config models, or we're manually deploying - if (builtModelVersions.size() > 0 && + if (! builtModelVersions.isEmpty() && ( builtModelVersions.get(0).getModel().skipOldConfigModels(now) || zone().environment().isManuallyDeployed())) log.log(Level.WARNING, applicationId + ": Failed to build version " + version + ", but allow failure due to validation override or manual deployment:" diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java index 7a343143c843..d2d7b74bdabe 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java @@ -19,7 +19,6 @@ import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.api.ValidationParameters; -import com.yahoo.config.model.api.ValidationParameters.IgnoreValidationErrors; import com.yahoo.config.model.application.provider.FilesApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.provision.AllocatedHosts; @@ -49,6 +48,8 @@ import java.util.logging.Level; import java.util.logging.Logger; +import static com.yahoo.config.model.api.ValidationParameters.IgnoreValidationErrors.FALSE; +import static com.yahoo.config.model.api.ValidationParameters.IgnoreValidationErrors.TRUE; import static com.yahoo.yolean.Exceptions.toMessageString; import static java.util.logging.Level.FINE; @@ -142,9 +143,8 @@ private ModelCreateResult createAndValidateModel(ModelFactory modelFactory, ModelContext modelContext) { log.log(FINE, () -> "Create and validate model " + modelVersion + " for " + applicationId + ", previous model " + (modelOf(modelVersion).isPresent() ? " exists" : "does not exist")); - ValidationParameters validationParameters = - new ValidationParameters(params.ignoreValidationErrors() ? IgnoreValidationErrors.TRUE : IgnoreValidationErrors.FALSE); - ModelCreateResult result = modelFactory.createAndValidateModel(modelContext, validationParameters); + var validationParameters = new ValidationParameters(params.ignoreValidationErrors() ? TRUE : FALSE); + var result = modelFactory.createAndValidateModel(modelContext, validationParameters); validateModelHosts(hostValidator, applicationId, result.getModel()); log.log(FINE, () -> "Done building model " + modelVersion + " for " + applicationId); params.getTimeoutBudget().assertNotTimedOut(() -> "prepare timed out after building model " + modelVersion + From 7670fa5b6556051df1d89d1da0d13cf924dd6306 Mon Sep 17 00:00:00 2001 From: Eirik Nygaard Date: Sun, 17 Nov 2024 22:37:53 +0100 Subject: [PATCH 049/126] Support restricting users in a tenant to specific email domains --- flags/src/main/java/com/yahoo/vespa/flags/Flags.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 3e02c61bd986..3d266274b465 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -498,6 +498,13 @@ public class Flags { "Takes effect immediately", CONSOLE_USER_EMAIL); + public static final UnboundListFlag RESTRICT_USERS_TO_DOMAIN = defineListFlag( + "restrict-users-to-domain", List.of(), String.class, + List.of("eirik"), "2024-11-07", "2025-02-07", + "Only allow adding specific email domains as user to tenant", + "Takes effect immediately", + TENANT_ID); + /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List owners, String createdAt, String expiresAt, String description, From 83efc9920b55c6efef9ce5dbb6074b89a32647a1 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Sun, 17 Nov 2024 22:50:51 +0100 Subject: [PATCH 050/126] More refactoring, no functional changes --- .../server/modelfactory/ModelsBuilder.java | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java index b55d5b3acff0..171421800c4e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java @@ -187,24 +187,22 @@ private List buildModelVersions(Set versions, Instant now, boolean buildLatestModelForThisMajor, int majorVersion) { - List builtModelVersions = new ArrayList<>(); - Optional latest = Optional.empty(); + List built = new ArrayList<>(); if (buildLatestModelForThisMajor) { - latest = Optional.of(findLatest(versions)); - // load latest application version - MODELRESULT latestModelVersion = buildModelVersion(modelFactoryRegistry.getFactory(latest.get()), - applicationPackage, - applicationId, - wantedDockerImageRepository, - wantedNodeVespaVersion); - allocatedHosts.add(latestModelVersion.getModel().allocatedHosts(), latest.get()); - builtModelVersions.add(latestModelVersion); + var latest = findLatest(versions); + var latestModelVersion = buildModelVersion(modelFactoryRegistry.getFactory(latest), + applicationPackage, + applicationId, + wantedDockerImageRepository, + wantedNodeVespaVersion); + allocatedHosts.add(latestModelVersion.getModel().allocatedHosts(), latest); + built.add(latestModelVersion); } // load old model versions versions = versionsToBuild(versions, wantedNodeVespaVersion, majorVersion, allocatedHosts); for (Version version : versions) { - if (latest.isPresent() && version.equals(latest.get())) continue; // already loaded + if (alreadyBuilt(version, built)) continue; try { MODELRESULT modelVersion = buildModelVersion(modelFactoryRegistry.getFactory(version), @@ -213,12 +211,12 @@ private List buildModelVersions(Set versions, wantedDockerImageRepository, wantedNodeVespaVersion); allocatedHosts.add(modelVersion.getModel().allocatedHosts(), version); - builtModelVersions.add(modelVersion); + built.add(modelVersion); } catch (RuntimeException e) { // allow failure to create old config models if there is a validation override that allow skipping old // config models, or we're manually deploying - if (! builtModelVersions.isEmpty() && - ( builtModelVersions.get(0).getModel().skipOldConfigModels(now) || zone().environment().isManuallyDeployed())) + if (! built.isEmpty() && + ( built.get(0).getModel().skipOldConfigModels(now) || zone().environment().isManuallyDeployed())) log.log(Level.WARNING, applicationId + ": Failed to build version " + version + ", but allow failure due to validation override or manual deployment:" + Exceptions.toMessageString(e)); @@ -228,7 +226,13 @@ private List buildModelVersions(Set versions, } } } - return builtModelVersions; + return built; + } + + private static boolean alreadyBuilt(Version version, List built) { + return built.stream() + .map(modelresult -> modelresult.getModel().version()) + .anyMatch(version::equals); } private Set versionsToBuild(Set versions, Version wantedVersion, int majorVersion, From aa0d4f09c0fa680f5096da577dc0606b60bb5398 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 18 Nov 2024 00:22:55 +0100 Subject: [PATCH 051/126] Don't move away context that is still in use. --- .../searchcore/proton/reference/gid_to_lid_change_handler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/reference/gid_to_lid_change_handler.cpp b/searchcore/src/vespa/searchcore/proton/reference/gid_to_lid_change_handler.cpp index e7e24c908d75..c3642404b0e2 100644 --- a/searchcore/src/vespa/searchcore/proton/reference/gid_to_lid_change_handler.cpp +++ b/searchcore/src/vespa/searchcore/proton/reference/gid_to_lid_change_handler.cpp @@ -82,12 +82,12 @@ GidToLidChangeHandler::notifyRemoves(IDestructorCallbackSP context, const std::v assert(entry.removeSerialNum < serialNum); assert(entry.putSerialNum < serialNum); if (entry.removeSerialNum < entry.putSerialNum) { - notifyRemove(std::move(context), gid); + notifyRemove(context, gid); } entry.removeSerialNum = serialNum; ++entry.refCount; } else { - notifyRemove(std::move(context), gid); + notifyRemove(context, gid); } _pending_changes.emplace_back(IDestructorCallbackSP(), gid, 0, serialNum, true); } From 84e202bb86b8fe528f2e578329fb9e7400523f2c Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Mon, 18 Nov 2024 08:43:27 +0100 Subject: [PATCH 052/126] Remove tester-flavor remnants --- .../config/application/api/xml/DeploymentSpecXmlReader.java | 1 - .../com/yahoo/config/application/api/DeploymentSpecTest.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java b/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java index 786484f4fe22..99211c1d39bf 100644 --- a/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java +++ b/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java @@ -91,7 +91,6 @@ public class DeploymentSpecXmlReader { private static final String idAttribute = "id"; private static final String athenzServiceAttribute = "athenz-service"; private static final String athenzDomainAttribute = "athenz-domain"; - private static final String testerFlavorAttribute = "tester-flavor"; private static final String testerTag = "tester"; private static final String nodesTag = "nodes"; private static final String majorVersionAttribute = "major-version"; diff --git a/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java b/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java index 7fc1a3962577..debe41bf76d5 100644 --- a/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java +++ b/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java @@ -1607,7 +1607,7 @@ public void testDeployableHash() { - + From 9adb5a124e6ce52269d1ea00da410c0eb4316c84 Mon Sep 17 00:00:00 2001 From: gjoranv Date: Mon, 4 Nov 2024 19:40:55 +0100 Subject: [PATCH 053/126] Use vaultId for aws role names - Athenz role names still use vaultName --- .../ai/vespa/secret/aws/AsmTenantSecretReader.java | 5 ++++- .../main/java/ai/vespa/secret/aws/AthenzUtil.java | 14 ++++++++++---- .../secret/aws/AsmTenantSecretReaderTest.java | 8 +++++--- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java index 9fc560ebd5bd..b0639894df1c 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java @@ -53,7 +53,10 @@ static Map createVaultIdMap(AsmTenantSecretConfig config) { @Override protected AwsRolePath awsRole(VaultName vault) { - return AthenzUtil.awsReaderRole(system, tenant, vault); + if ( ! vaultIds.containsKey(vault)) { + throw new IllegalArgumentException("No vault id found for " + vault); + } + return AthenzUtil.awsReaderRole(system, tenant, vaultIds.get(vault)); } @Override diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AthenzUtil.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AthenzUtil.java index 21005ed21fed..b0c2372172fa 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AthenzUtil.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AthenzUtil.java @@ -2,6 +2,7 @@ package ai.vespa.secret.aws; import ai.vespa.secret.model.Role; +import ai.vespa.secret.model.VaultId; import ai.vespa.secret.model.VaultName; import com.yahoo.vespa.athenz.api.AwsRole; @@ -32,6 +33,11 @@ public static String resourceEntityName(String system, String tenant, VaultName .toLowerCase(); } + /* .reader */ + public static String athenzReaderRoleName(VaultName vault) { + return "%s.%s".formatted(vault.value(), Role.READER.value()); + } + /* Path: /tenant-secret/// */ public static AwsPath awsPath(String systemName, String tenantName) { return AwsPath.of(PREFIX, systemName, tenantName); @@ -43,13 +49,13 @@ public static AwsPath awsPath(String systemName, String tenantName) { * We use vaultId instead of vaultName because vaultName is not unique across tenants, * and role names must be unique across paths within an account. */ - public static AwsRolePath awsReaderRole(String systemName, String tenantName, VaultName vault) { - return new AwsRolePath(awsPath(systemName, tenantName), new AwsRole(athenzReaderRoleName(vault))); + public static AwsRolePath awsReaderRole(String systemName, String tenantName, VaultId vaultId) { + return new AwsRolePath(awsPath(systemName, tenantName), new AwsRole(awsReaderRoleName(vaultId))); } /* .reader */ - private static String athenzReaderRoleName(VaultName vault) { - return "%s.%s".formatted(vault.value(), Role.READER.value()); + private static String awsReaderRoleName(VaultId vaultId) { + return "%s.%s".formatted(vaultId.value(), Role.READER.value()); } } diff --git a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java index 1aab91d4dc0f..d926dd8051de 100644 --- a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java +++ b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java @@ -43,15 +43,17 @@ void reset() { } AsmTenantSecretReader secretReader() { - return new AsmTenantSecretReader(tester::newClient, system, tenant, Map.of()); + return new AsmTenantSecretReader(tester::newClient, system, tenant, + Map.of(VaultName.of("vault1"), VaultId.of("vaultId1"), + VaultName.of("vault2"), VaultId.of("vaultId2"))); } @Test void it_creates_one_credentials_and_client_per_vault_and_closes_them() { var vault1 = VaultName.of("vault1"); - var awsRole1 = AwsRolePath.fromStrings("/tenant-secret/publiccd/tenant1/", "vault1.reader"); + var awsRole1 = AwsRolePath.fromStrings("/tenant-secret/publiccd/tenant1/", "vaultId1.reader"); var vault2 = VaultName.of("vault2"); - var awsRole2 = AwsRolePath.fromStrings("/tenant-secret/publiccd/tenant1/", "vault2.reader"); + var awsRole2 = AwsRolePath.fromStrings("/tenant-secret/publiccd/tenant1/", "vaultId2.reader"); var secret1 = new SecretVersion("1", SecretVersionState.CURRENT, "secret1"); var secret2 = new SecretVersion("2", SecretVersionState.CURRENT, "secret2"); From eba1e94a7a35326516ae515e4e7fac5648c230a9 Mon Sep 17 00:00:00 2001 From: Morten Tokle Date: Fri, 15 Nov 2024 11:12:20 +0100 Subject: [PATCH 054/126] Prepare externalid for tenant secrets --- .../ai/vespa/secret/aws/AsmTenantSecretReader.java | 14 ++++++++------ .../secret/aws/AsmTenantSecretReaderTest.java | 12 +++++++++--- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java index b0639894df1c..704f9433601c 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java @@ -3,6 +3,7 @@ import ai.vespa.secret.config.aws.AsmSecretConfig; import ai.vespa.secret.config.aws.AsmTenantSecretConfig; +import ai.vespa.secret.model.ExternalId; import ai.vespa.secret.model.Key; import ai.vespa.secret.model.VaultId; import ai.vespa.secret.model.VaultName; @@ -23,7 +24,7 @@ public final class AsmTenantSecretReader extends AsmSecretReader { private final String system; private final String tenant; - private final Map vaultIds; + private final Map vaults; @Inject public AsmTenantSecretReader(AsmSecretConfig secretConfig, @@ -32,22 +33,22 @@ public AsmTenantSecretReader(AsmSecretConfig secretConfig, super(secretConfig, identities); this.system = tenantConfig.system(); this.tenant = tenantConfig.tenant(); - this.vaultIds = createVaultIdMap(tenantConfig); + this.vaults = createVaultIdMap(tenantConfig); } // For testing AsmTenantSecretReader(Function clientAndCredentialsSupplier, - String system, String tenant, Map vaultIds) { + String system, String tenant, Map vaults) { super(clientAndCredentialsSupplier); this.system = system; this.tenant = tenant; - this.vaultIds = vaultIds; + this.vaults = vaults; } - static Map createVaultIdMap(AsmTenantSecretConfig config) { + static Map createVaultIdMap(AsmTenantSecretConfig config) { // Note: we can rightfully assume that the vaults are unique by name for a tenant. return config.vaults().stream() - .map(vault -> Map.entry(VaultName.of(vault.name()), VaultId.of(vault.id()))) + .map(vault -> Map.entry(VaultName.of(vault.name()), new Vault(VaultId.of(vault.id()), VaultName.of(vault.name()), ExternalId.of(vault.externalId())))) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } @@ -75,4 +76,5 @@ public static String getAwsSecretId(String system, String tenant, Key key) { key.vaultName().value(), key.secretName().value()); } + record Vault(VaultId vaultId, VaultName vaultName, ExternalId externalId) {} } diff --git a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java index d926dd8051de..eadf687e24a4 100644 --- a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java +++ b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java @@ -3,6 +3,7 @@ import ai.vespa.secret.aws.testutil.AsmSecretReaderTester; import ai.vespa.secret.aws.testutil.AsmSecretTesterBase.SecretVersion; import ai.vespa.secret.config.aws.AsmTenantSecretConfig; +import ai.vespa.secret.model.ExternalId; import ai.vespa.secret.model.Key; import ai.vespa.secret.model.Secret; import ai.vespa.secret.model.SecretName; @@ -172,10 +173,15 @@ void it_creates_map_from_vaultName_to_vaultId_from_config() { .vaults(builder -> builder.name("vault1").id("id1").externalId("ext1")) .vaults(builder -> builder.name("vault2").id("id2").externalId("ext2")); - Map idMap = AsmTenantSecretReader.createVaultIdMap(config.build()); + Map idMap = AsmTenantSecretReader.createVaultIdMap(config.build()); assertEquals(2, idMap.size()); - assertEquals(VaultId.of("id1"), idMap.get(VaultName.of("vault1"))); - assertEquals(VaultId.of("id2"), idMap.get(VaultName.of("vault2"))); + var vault1 = idMap.get(VaultName.of("vault1")); + assertEquals(VaultId.of("id1"), vault1.vaultId()); + assertEquals(ExternalId.of("ext1"), vault1.externalId()); + + var vault2 = idMap.get(VaultName.of("vault2")); + assertEquals(VaultId.of("id2"), vault2.vaultId()); + assertEquals(ExternalId.of("ext2"), vault2.externalId()); } private void assertSame(SecretVersion version, Secret secret) { From 83dc7dab303647e6516742876588cff6af5743ab Mon Sep 17 00:00:00 2001 From: Morten Tokle Date: Fri, 15 Nov 2024 15:32:32 +0100 Subject: [PATCH 055/126] Include externalid when assuming aws role. --- .../ai/vespa/secret/aws/AsmSecretReader.java | 2 +- .../vespa/secret/aws/AsmSecretStoreBase.java | 28 ++++++++++------- .../secret/aws/AsmTenantSecretReader.java | 7 ++++- .../ai/vespa/secret/aws/AssumedRoleInfo.java | 30 +++++++++++++++++++ .../aws/testutil/AsmSecretReaderTester.java | 5 ++-- 5 files changed, 57 insertions(+), 15 deletions(-) create mode 100644 jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AssumedRoleInfo.java diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java index c085cb6f83c0..0c8e2cb63b75 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java @@ -63,7 +63,7 @@ private AsmSecretReader(ZtsClient ztsClient, AthenzDomain domain) { } // For testing - public AsmSecretReader(Function clientAndCredentialsSupplier) { + public AsmSecretReader(Function clientAndCredentialsSupplier) { super(clientAndCredentialsSupplier); cache = initCache(); ztsClientCloser = () -> {}; diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretStoreBase.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretStoreBase.java index 9a31e143bbf0..b4f50e9d3af7 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretStoreBase.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretStoreBase.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package ai.vespa.secret.aws; +import ai.vespa.secret.model.ExternalId; import ai.vespa.secret.model.VaultName; import com.yahoo.component.AbstractComponent; import com.yahoo.vespa.athenz.api.AthenzDomain; @@ -13,6 +14,7 @@ import software.amazon.awssdk.services.secretsmanager.SecretsManagerClient; import java.util.HashSet; +import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -27,38 +29,43 @@ public abstract class AsmSecretStoreBase extends AbstractComponent implements Au public static final String AWSCURRENT = "AWSCURRENT"; - private final Function clientAndCredentialsSupplier; + private final Function clientAndCredentialsSupplier; - private final ConcurrentMap clientMap = new ConcurrentHashMap<>(); + private final ConcurrentMap clientMap = new ConcurrentHashMap<>(); public AsmSecretStoreBase(ZtsClient ztsClient, AthenzDomain athenzDomain) { - this(awsRole -> SecretsManagerClient.builder().region(Region.US_EAST_1) - .credentialsProvider(getAwsSessionCredsProvider(awsRole, ztsClient, athenzDomain)) + this(assumedRoleInfo -> SecretsManagerClient.builder().region(Region.US_EAST_1) + .credentialsProvider(getAwsSessionCredsProvider(assumedRoleInfo, ztsClient, athenzDomain)) .build() ); } // For testing - protected AsmSecretStoreBase(Function clientAndCredentialsSupplier) { + protected AsmSecretStoreBase(Function clientAndCredentialsSupplier) { this.clientAndCredentialsSupplier = clientAndCredentialsSupplier; } /** Returns the AWS role associated with the given vault. */ protected abstract AwsRolePath awsRole(VaultName vault); + protected ExternalId externalId(VaultName vault) { + return null; + } protected SecretsManagerClient getClient(VaultName vault) { var awsRole = awsRole(vault); - clientMap.putIfAbsent(awsRole, clientAndCredentialsSupplier.apply(awsRole)); - return clientMap.get(awsRole); + var externalId = externalId(vault); + var assumedRoleInfo = new AssumedRoleInfo(awsRole, Optional.ofNullable(externalId)); + clientMap.putIfAbsent(assumedRoleInfo, clientAndCredentialsSupplier.apply(assumedRoleInfo)); + return clientMap.get(assumedRoleInfo); } - private static AwsCredentialsProvider getAwsSessionCredsProvider(AwsRolePath role, + private static AwsCredentialsProvider getAwsSessionCredsProvider(AssumedRoleInfo roleInfo, ZtsClient ztsClient, AthenzDomain athenzDomain) { - AwsCredentials credentials = new AwsCredentials(ztsClient, athenzDomain, role.athenzAwsRole()); + AwsCredentials credentials = new AwsCredentials(ztsClient, athenzDomain, roleInfo.role().athenzAwsRole(), roleInfo.externalId().map(ExternalId::value).orElse(null)); return () -> { AwsTemporaryCredentials temporary = credentials.get(); return AwsSessionCredentials.create(temporary.accessKeyId(), @@ -86,7 +93,6 @@ public void deconstruct() { // Only for testing public Set clientRoleNames() { - return new HashSet<>(clientMap.keySet()); + return new HashSet<>(clientMap.keySet().stream().map(AssumedRoleInfo::role).toList()); } - } diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java index 704f9433601c..93437ffd7999 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java @@ -12,6 +12,7 @@ import software.amazon.awssdk.services.secretsmanager.SecretsManagerClient; import java.util.Map; +import java.util.Optional; import java.util.function.Function; import java.util.stream.Collectors; @@ -37,7 +38,7 @@ public AsmTenantSecretReader(AsmSecretConfig secretConfig, } // For testing - AsmTenantSecretReader(Function clientAndCredentialsSupplier, + AsmTenantSecretReader(Function clientAndCredentialsSupplier, String system, String tenant, Map vaults) { super(clientAndCredentialsSupplier); this.system = system; @@ -60,6 +61,10 @@ protected AwsRolePath awsRole(VaultName vault) { return AthenzUtil.awsReaderRole(system, tenant, vaultIds.get(vault)); } + protected ExternalId externalId(VaultName vaultName) { + return Optional.ofNullable(vaults.get(vaultName)).map(Vault::externalId).orElse(null); + } + @Override protected String awsSecretId(Key key) { return awsSecretId(tenant, key); diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AssumedRoleInfo.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AssumedRoleInfo.java new file mode 100644 index 000000000000..bc6f14b15b90 --- /dev/null +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AssumedRoleInfo.java @@ -0,0 +1,30 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package ai.vespa.secret.aws; + +import ai.vespa.secret.model.ExternalId; + +import java.util.Objects; +import java.util.Optional; + +/** + * Information used to assume an AWS role. + * @param role The role and path to assume + * @param externalId The external ID to use when assuming the role, Optional.empty() if not required + * @author mortent + */ +public record AssumedRoleInfo(AwsRolePath role, Optional externalId) { + + public AssumedRoleInfo { + Objects.requireNonNull(role, "role cannot be null"); + Objects.requireNonNull(externalId, "externalId cannot be null"); + } + + public static AssumedRoleInfo of(AwsRolePath role) { + return new AssumedRoleInfo(role, Optional.empty()); + } + + public static AssumedRoleInfo of(AwsRolePath role, ExternalId externalId) { + return new AssumedRoleInfo(role, Optional.ofNullable(externalId)); + } +} diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/testutil/AsmSecretReaderTester.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/testutil/AsmSecretReaderTester.java index 94b1e7ce7262..18da4ae4fbba 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/testutil/AsmSecretReaderTester.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/testutil/AsmSecretReaderTester.java @@ -5,6 +5,7 @@ package ai.vespa.secret.aws.testutil; +import ai.vespa.secret.aws.AssumedRoleInfo; import ai.vespa.secret.aws.AwsRolePath; import ai.vespa.secret.model.Key; import ai.vespa.secret.model.SecretVersionState; @@ -30,8 +31,8 @@ public void put(Key key, SecretVersion... versions) { secrets.put(awsSecretIdMapper.apply(key), List.of(versions)); } - public MockSecretsReader newClient(AwsRolePath awsRole) { - return new MockSecretsReader(awsRole); + public MockSecretsReader newClient(AssumedRoleInfo assumedRoleInfo) { + return new MockSecretsReader(assumedRoleInfo.role()); } From 1d27573c65435e0db57a3e2c26f8c93a3d33b5ae Mon Sep 17 00:00:00 2001 From: gjoranv Date: Mon, 18 Nov 2024 11:25:58 +0100 Subject: [PATCH 056/126] Secret values cannot exceed 64K in ASM --- .../src/main/java/ai/vespa/secret/model/SecretValue.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/container-disc/src/main/java/ai/vespa/secret/model/SecretValue.java b/container-disc/src/main/java/ai/vespa/secret/model/SecretValue.java index aec491382fc3..bc940f0fb689 100644 --- a/container-disc/src/main/java/ai/vespa/secret/model/SecretValue.java +++ b/container-disc/src/main/java/ai/vespa/secret/model/SecretValue.java @@ -5,10 +5,15 @@ */ public record SecretValue(String value) { + private static final int MAX_LENGTH = 64*1024; + public SecretValue { if (value == null || value.isBlank()) { throw new IllegalArgumentException("Secret value cannot be null or empty"); } + if (value.length() > MAX_LENGTH) { + throw new IllegalArgumentException("Secret value is too long"); + } } public static SecretValue of(String value) { From 415b31f44ebe6954c4c7f07dbf83cdd695c0c7c1 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Mon, 18 Nov 2024 12:04:36 +0100 Subject: [PATCH 057/126] Log warning (shown in console) when using admin version 2.0 on cloud --- .../java/com/yahoo/config/model/admin/AdminModel.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java index 4ef591cda9f7..9a810cb04a7e 100644 --- a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java +++ b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java @@ -20,7 +20,8 @@ import java.util.Collection; import java.util.List; -import java.util.logging.Level; + +import static java.util.logging.Level.WARNING; /** * Config model adaptor of the Admin class. @@ -78,7 +79,10 @@ public BuilderV2() { @Override public void doBuild(AdminModel model, Element adminElement, ConfigModelContext modelContext) { - if (modelContext.getDeployState().isHosted()) { // admin v4 is used on hosted: Build a default V4 instead + // admin v4 is used on hosted: Build a default V4 instead + if (modelContext.getDeployState().isHosted()) { + modelContext.getDeployLogger().logApplicationPackage(WARNING, " version 2.0 is not supported " + + "and will be ignored, please use version 4.0 instead"); new BuilderV4().doBuild(model, adminElement, modelContext); return; } @@ -114,7 +118,7 @@ public void doBuild(AdminModel model, Element adminElement, ConfigModelContext m // TODO: Remove in Vespa 9 if ("3.0".equals(adminElement.getAttribute("version"))) modelContext.getDeployState().getDeployLogger() - .logApplicationPackage(Level.WARNING, "admin model version 3.0 is deprecated and support will removed in Vespa 9, " + + .logApplicationPackage(WARNING, "admin model version 3.0 is deprecated and support will removed in Vespa 9, " + "please use version 4.0 or remove the element completely. See https://cloud.vespa.ai/en/reference/services#ignored-elements"); TreeConfigProducer parent = modelContext.getParentProducer(); From 1c1a03582714a86eacb311e928a36cab5df802d1 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Mon, 18 Nov 2024 12:34:39 +0100 Subject: [PATCH 058/126] Update config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java Co-authored-by: Kristian Aune --- .../src/main/java/com/yahoo/config/model/admin/AdminModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java index 9a810cb04a7e..3609adaef499 100644 --- a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java +++ b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java @@ -81,7 +81,7 @@ public BuilderV2() { public void doBuild(AdminModel model, Element adminElement, ConfigModelContext modelContext) { // admin v4 is used on hosted: Build a default V4 instead if (modelContext.getDeployState().isHosted()) { - modelContext.getDeployLogger().logApplicationPackage(WARNING, " version 2.0 is not supported " + + modelContext.getDeployLogger().logApplicationPackage(WARNING, " version 2.0 is deprecated" + "and will be ignored, please use version 4.0 instead"); new BuilderV4().doBuild(model, adminElement, modelContext); return; From 96bf71e164d738a0410bc73e2eccd36269256727 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Mon, 18 Nov 2024 12:36:41 +0100 Subject: [PATCH 059/126] Add space --- .../src/main/java/com/yahoo/config/model/admin/AdminModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java index 3609adaef499..0625dc20e22a 100644 --- a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java +++ b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java @@ -82,7 +82,7 @@ public void doBuild(AdminModel model, Element adminElement, ConfigModelContext m // admin v4 is used on hosted: Build a default V4 instead if (modelContext.getDeployState().isHosted()) { modelContext.getDeployLogger().logApplicationPackage(WARNING, " version 2.0 is deprecated" + - "and will be ignored, please use version 4.0 instead"); + " and will be ignored, please use version 4.0 instead"); new BuilderV4().doBuild(model, adminElement, modelContext); return; } From 64fe5106e62ccc79bb0e875031a6c4771c688dbd Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Mon, 18 Nov 2024 13:22:51 +0000 Subject: [PATCH 060/126] Align naming of disk usage metrics with existing metrics. --- .../vespa/searchcore/proton/metrics/field_metrics_entry.cpp | 4 ++-- .../src/vespa/searchcore/proton/metrics/field_metrics_entry.h | 4 ++-- .../searchcore/proton/server/documentdb_metrics_updater.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.cpp b/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.cpp index 700b43e356a8..ee5b836b2277 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.cpp @@ -7,10 +7,10 @@ namespace proton { FieldMetricsEntry::FieldMetricsEntry(const std::string& name, const std::string& field_name, const std::string& description) : metrics::MetricSet(name, {{"field", field_name}}, description, nullptr), memoryUsage(this), - size_on_disk("size_on_disk", {}, "Size on disk (bytes)", this) + disk_usage("disk_usage", {}, "Disk space usage (in bytes)", this) { } FieldMetricsEntry::~FieldMetricsEntry() = default; -} // namespace proton +} diff --git a/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.h b/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.h index 62d15d478e19..f023c5937df2 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.h @@ -11,9 +11,9 @@ namespace proton { */ struct FieldMetricsEntry : public metrics::MetricSet { MemoryUsageMetrics memoryUsage; - metrics::LongValueMetric size_on_disk; + metrics::LongValueMetric disk_usage; FieldMetricsEntry(const std::string& name, const std::string& field_name, const std::string& description); ~FieldMetricsEntry() override; }; -} // namespace proton +} diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp index 02633cffeb36..88cf213816a8 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp @@ -85,7 +85,7 @@ updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::SearchableSta auto entry = field_metrics.get_field_metrics_entry(field.first); if (entry) { entry->memoryUsage.update(field.second.memory_usage()); - entry->size_on_disk.set(field.second.size_on_disk()); + entry->disk_usage.set(field.second.size_on_disk()); entry->update_disk_io(field.second.cache_disk_io_stats()); } disk_io.merge(field.second.cache_disk_io_stats()); @@ -185,7 +185,7 @@ updateAttributeMetrics(AttributeMetrics &metrics, const TempAttributeMetrics &tm auto entry = metrics.get_field_metrics_entry(attr.first); if (entry) { entry->memoryUsage.update(attr.second.memoryUsage); - entry->size_on_disk.set(attr.second.size_on_disk); + entry->disk_usage.set(attr.second.size_on_disk); } } } From 0939a8f583287a2284a4707c4be3c8227b295745 Mon Sep 17 00:00:00 2001 From: Morten Tokle Date: Mon, 18 Nov 2024 14:38:21 +0100 Subject: [PATCH 061/126] Follow changes --- .../main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java | 4 ++-- .../java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java index 93437ffd7999..c736b50bd743 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java @@ -55,10 +55,10 @@ static Map createVaultIdMap(AsmTenantSecretConfig config) { @Override protected AwsRolePath awsRole(VaultName vault) { - if ( ! vaultIds.containsKey(vault)) { + if ( ! vaults.containsKey(vault)) { throw new IllegalArgumentException("No vault id found for " + vault); } - return AthenzUtil.awsReaderRole(system, tenant, vaultIds.get(vault)); + return AthenzUtil.awsReaderRole(system, tenant, vaults.get(vault).vaultId()); } protected ExternalId externalId(VaultName vaultName) { diff --git a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java index eadf687e24a4..cd05fb4898fe 100644 --- a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java +++ b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java @@ -45,8 +45,8 @@ void reset() { AsmTenantSecretReader secretReader() { return new AsmTenantSecretReader(tester::newClient, system, tenant, - Map.of(VaultName.of("vault1"), VaultId.of("vaultId1"), - VaultName.of("vault2"), VaultId.of("vaultId2"))); + Map.of(VaultName.of("vault1"), new AsmTenantSecretReader.Vault(VaultId.of("vaultId1"), VaultName.of("vault1"), ExternalId.of("ext1")), + VaultName.of("vault2"), new AsmTenantSecretReader.Vault(VaultId.of("vaultId2"), VaultName.of("vault2"), ExternalId.of("ext2")))); } @Test From 1d3992a3d1060736e1425cd940be16644c16cd45 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 18 Nov 2024 15:07:57 +0100 Subject: [PATCH 062/126] Trim small posting lists to limit cache bloat. --- .../diskindex/diskindex/diskindex_test.cpp | 3 ++ .../vespa/searchlib/diskindex/field_index.cpp | 10 ++++-- .../vespa/searchlib/diskindex/field_index.h | 4 ++- .../searchlib/diskindex/zcposoccrandread.cpp | 35 +++++++++++++++++-- .../searchlib/diskindex/zcposoccrandread.h | 3 ++ .../vespa/searchlib/index/postinglistfile.cpp | 7 ++++ .../vespa/searchlib/index/postinglistfile.h | 9 +++-- 7 files changed, 62 insertions(+), 9 deletions(-) diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index 575152225af7..cdad5118b138 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -251,6 +251,9 @@ DiskIndexTest::requireThatWeCanReadPostingList() auto r = _index->lookup(0, "w1"); auto& field_index = _index->get_field_index(0); auto h = field_index.read_posting_list(r); + if (field_index.get_posting_list_cache_enabled()) { + EXPECT_GT(64, h._allocSize); + } auto sb = field_index.create_iterator(r, h, mda); EXPECT_EQ(SimpleResult({1,3}), SimpleResult().search(*sb)); } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index e4c3a28eb2a3..9ad02e7adff5 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -172,12 +172,16 @@ FieldIndex::reuse_files(const FieldIndex& rhs) } PostingListHandle -FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_result) const +FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_result, bool trim) const { auto handle = _posting_file->read_posting_list(lookup_result); if (handle._read_bytes != 0) { _cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes); } + if (trim && + handle._allocSize > (16 + ((lookup_result.bitOffset & 63) + lookup_result.counts._bitLength + 63) / 8) * 1.2) { + _posting_file->trim_posting_list(lookup_result, handle); // Trim posting list if more than 20% bloat + } return handle; } @@ -188,7 +192,7 @@ FieldIndex::read(const IPostingListCache::Key& key, IPostingListCache::Context& DictionaryLookupResult lookup_result; lookup_result.bitOffset = key.bit_offset; lookup_result.counts._bitLength = key.bit_length; - return read_uncached_posting_list(lookup_result); + return read_uncached_posting_list(lookup_result, true); } PostingListHandle @@ -199,7 +203,7 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const return {}; } if (file->getMemoryMapped() || !_posting_list_cache_enabled) { - return read_uncached_posting_list(lookup_result); + return read_uncached_posting_list(lookup_result, false); } IPostingListCache::Key key; key.file_id = _file_id; diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index 6a39c4982488..0e41ffc3eb6a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -72,7 +72,8 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { bool open_dictionary(const std::string& field_dir, const TuneFileSearch& tune_file_search); bool open(const std::string& field_dir, const TuneFileSearch &tune_file_search); void reuse_files(const FieldIndex& rhs); - index::PostingListHandle read_uncached_posting_list(const search::index::DictionaryLookupResult& lookup_result) const; + index::PostingListHandle read_uncached_posting_list(const search::index::DictionaryLookupResult &lookup_result, + bool trim) const; index::PostingListHandle read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const override; index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const; index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const; @@ -87,6 +88,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); } FieldIndexStats get_stats() const; uint32_t get_field_id() const noexcept { return _field_id; } + bool get_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; } }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp index 4694017a7d4b..cb62370cc7f2 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -44,7 +44,8 @@ ZcPosOccRandRead::ZcPosOccRandRead() _fileBitSize(0), _headerBitSize(0), _fieldsParams() -{ } +{ +} ZcPosOccRandRead::~ZcPosOccRandRead() @@ -108,8 +109,8 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) size_t padExtraAfter; // Decode prefetch space _file->DirectIOPadding(startOffset, vectorLen, padBefore, padAfter); padExtraAfter = 0; - if (padAfter < 16) { - padExtraAfter = 16 - padAfter; + if (padAfter < decode_prefetch_size) { + padExtraAfter = decode_prefetch_size - padAfter; } size_t mallocLen = padBefore + vectorLen + padAfter + padExtraAfter; @@ -137,6 +138,33 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) return handle; } +void +ZcPosOccRandRead::trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const +{ + if (lookup_result.counts._bitLength == 0 || _memoryMapped) { + return; + } + uint64_t start_offset = (lookup_result.bitOffset + _headerBitSize) >> 3; + // Align start at 64-bit boundary + start_offset -= (start_offset & 7); + uint64_t end_offset = (lookup_result.bitOffset + _headerBitSize + + lookup_result.counts._bitLength + 7) >> 3; + // Align end at 64-bit boundary + end_offset += (-end_offset & 7); + size_t malloc_len = end_offset - start_offset + decode_prefetch_size; + if (handle._allocSize == malloc_len) { + assert(handle._allocMem.get() == handle._mem); + return; + } + assert(handle._allocSize >= malloc_len); + auto *mem = malloc(malloc_len); + assert(mem != nullptr); + memcpy(mem, handle._mem, malloc_len); + handle._allocMem = std::shared_ptr(mem, free); + handle._mem = mem; + handle._allocSize = malloc_len; + handle._read_bytes = end_offset - start_offset; +} bool ZcPosOccRandRead:: @@ -156,6 +184,7 @@ open(const std::string &name, const TuneFileRandRead &tuneFileRead) _fileSize = _file->getSize(); readHeader(); + afterOpen(*_file); return true; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h index fd69786ac51d..bd5097fcdeab 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h @@ -22,6 +22,8 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead uint64_t _headerBitSize; bitcompression::PosOccFieldsParams _fieldsParams; + static constexpr size_t decode_prefetch_size = 16; + public: ZcPosOccRandRead(); ~ZcPosOccRandRead(); @@ -42,6 +44,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead * Read (possibly partial) posting list into handle. */ PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; + void trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp index 12bd62af9ef2..733b8009f447 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp @@ -110,6 +110,13 @@ PostingListFileRandReadPassThrough::read_posting_list(const DictionaryLookupResu return _lower->read_posting_list(lookup_result); } +void +PostingListFileRandReadPassThrough::trim_posting_list(const DictionaryLookupResult &lookup_result, + PostingListHandle &handle) const +{ + return _lower->trim_posting_list(lookup_result, handle); +} + bool PostingListFileRandReadPassThrough::open(const std::string &name, const TuneFileRandRead &tuneFileRead) diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h index 8ddfc2dc12cc..75cc3c65d158 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -159,8 +159,12 @@ class PostingListFileRandRead { /** * Read posting list into handle. */ - virtual PostingListHandle - read_posting_list(const DictionaryLookupResult& lookup_result) = 0; + virtual PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) = 0; + + /** + * Remove directio padding from posting list. + */ + virtual void trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const = 0; /** * Open posting list file for random read. @@ -199,6 +203,7 @@ class PostingListFileRandReadPassThrough : public PostingListFileRandRead { const search::fef::TermFieldMatchDataArray &matchData) const override; PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; + void trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; From 31e158f26736cca557705237b0df9596336d4987 Mon Sep 17 00:00:00 2001 From: Eirik Nygaard Date: Mon, 18 Nov 2024 15:11:15 +0100 Subject: [PATCH 063/126] Add use legacy auth0 request filter flag --- flags/src/main/java/com/yahoo/vespa/flags/Flags.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 3d266274b465..400ebf0a69f0 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -19,6 +19,7 @@ import static com.yahoo.vespa.flags.Dimension.HOSTNAME; import static com.yahoo.vespa.flags.Dimension.INSTANCE_ID; import static com.yahoo.vespa.flags.Dimension.NODE_TYPE; +import static com.yahoo.vespa.flags.Dimension.SYSTEM; import static com.yahoo.vespa.flags.Dimension.TENANT_ID; import static com.yahoo.vespa.flags.Dimension.VESPA_VERSION; @@ -505,6 +506,13 @@ public class Flags { "Takes effect immediately", TENANT_ID); + public static final UnboundBooleanFlag LEGACY_AUTH0_FILTER = defineFeatureFlag( + "use-legacy-auth0-filter", true, + List.of("eirik"), "2024-11-07", "2025-02-07", + "Use legacy auth0 request filter, or new one", + "Takes after controller restart", + SYSTEM); + /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List owners, String createdAt, String expiresAt, String description, From b3059b61f8f83ca8f052a6f9cf7fc318e473ef54 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Mon, 18 Nov 2024 15:00:14 +0000 Subject: [PATCH 064/126] Expose per field disk usage, disk index cache and disk index io metrics. --- .../ai/vespa/metrics/SearchNodeMetrics.java | 25 +++++++++++++++---- .../metrics/set/Vespa9VespaMetricSet.java | 14 +++++++++++ .../ai/vespa/metrics/set/VespaMetricSet.java | 14 +++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java b/metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java index ba5b65643e06..da488a4f5ec1 100644 --- a/metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java @@ -214,17 +214,32 @@ public enum SearchNodeMetrics implements VespaMetrics { CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.ready.attribute.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"), CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.ready.attribute.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"), CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.ready.attribute.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"), + CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_DISK_USAGE("content.proton.documentdb.ready.attribute.disk_usage", Unit.BYTE, "Disk space usage (in bytes) of the flushed snapshot of this attribute for this document type"), CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES("content.proton.documentdb.notready.attribute.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes"), CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.notready.attribute.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"), CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.notready.attribute.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"), CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.notready.attribute.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"), // index - CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ALLOCATED_BYTES("content.proton.documentdb.index.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes"), - CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.index.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"), - CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.index.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"), - CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.index.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"), - CONTENT_PROTON_DOCUMENTDB_INDEX_DISK_USAGE("content.proton.documentdb.index.disk_usage", Unit.BYTE, "Disk space usage in bytes"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_ELEMENTS("content.proton.index.cache.postinglist.elements", Unit.ITEM, "Number of elements in the cache. Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_MEMORY_USAGE("content.proton.index.cache.postinglist.memory_usage", Unit.BYTE, "Memory usage of the cache (in bytes). Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_HIT_RATE("content.proton.index.cache.postinglist.hit_rate", Unit.FRACTION, "Rate of hits in the cache compared to number of lookups. Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_LOOKUPS("content.proton.index.cache.postinglist.lookups", Unit.OPERATION, "Number of lookups in the cache (hits + misses). Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_INVALIDATIONS("content.proton.index.cache.postinglist.invalidations", Unit.OPERATION, "Number of invalidations (erased elements) in the cache. Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_ELEMENTS("content.proton.index.cache.bitvector.elements", Unit.ITEM, "Number of elements in the cache. Contains disk index bitvector files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_MEMORY_USAGE("content.proton.index.cache.bitvector.memory_usage", Unit.BYTE, "Memory usage of the cache (in bytes). Contains disk index bitvector files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_HIT_RATE("content.proton.index.cache.bitvector.hit_rate", Unit.FRACTION, "Rate of hits in the cache compared to number of lookups. Contains disk index bitvector files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_LOOKUPS("content.proton.index.cache.bitvector.lookups", Unit.OPERATION, "Number of lookups in the cache (hits + misses). Contains disk index bitvector files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_INVALIDATIONS("content.proton.index.cache.bitvector.invalidations", Unit.OPERATION, "Number of invalidations (erased elements) in the cache. Contains disk index bitvector files across all document types"), + CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ALLOCATED_BYTES("content.proton.documentdb.index.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes for the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.index.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes) for the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.index.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes) for the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.index.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold for the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_DISK_USAGE("content.proton.documentdb.index.disk_usage", Unit.BYTE, "Disk space usage (in bytes) of all disk indexes for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES("content.proton.documentdb.index.io.search.read_bytes", Unit.BYTE, "Bytes read from disk index posting list and bitvector files as part of search for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES("content.proton.documentdb.index.io.search.cached_read_bytes", Unit.BYTE, "Bytes read from cached disk index posting list and bitvector files as part of search for this document type"), + CONTENT_PROTON_DOCUMENTDB_READY_INDEX_MEMORY_USAGE_ALLOCATED_BYTES("content.proton.documentdb.ready.index.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes for this index field in the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_READY_INDEX_DISK_USAGE("content.proton.documentdb.ready.index.disk_usage", Unit.BYTE, "Disk space usage (in bytes) of this index field in all disk indexes for this document type"), // matching CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERIES("content.proton.documentdb.matching.queries", Unit.QUERY, "Number of queries executed"), diff --git a/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java index 0d5827369fd6..b24cdcfbcdbc 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java @@ -439,10 +439,24 @@ private static Set getSearchNodeMetrics() { // attribute addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_DISK_USAGE.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES.average()); // index addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ALLOCATED_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_INDEX_DISK_USAGE.average()); + + // index caches + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_INVALIDATIONS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_INVALIDATIONS.rate()); // matching addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERIES.rate()); diff --git a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java index 0e6c537f56dd..2bd426e2a8c3 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java @@ -503,6 +503,7 @@ private static Set getSearchNodeMetrics() { addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_USED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_DEAD_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_ONHOLD_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_DISK_USAGE.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_USED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_DEAD_BYTES.average()); @@ -513,6 +514,19 @@ private static Set getSearchNodeMetrics() { addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_USED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_DEAD_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ONHOLD_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_INDEX_DISK_USAGE.average()); + + // index caches + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_INVALIDATIONS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_INVALIDATIONS.rate()); // matching addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERIES.rate()); From e3b8c0f35759032abbb73c387436778848fc5af8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Pettersen?= Date: Thu, 14 Nov 2024 11:41:40 +0000 Subject: [PATCH 065/126] lz_string compression for tensor playground link --- eval/src/apps/eval_expr/eval_expr.cpp | 441 ++++++++++++++++++++++++++ 1 file changed, 441 insertions(+) diff --git a/eval/src/apps/eval_expr/eval_expr.cpp b/eval/src/apps/eval_expr/eval_expr.cpp index 1b7dcb3f7729..fd4ec875c2bb 100644 --- a/eval/src/apps/eval_expr/eval_expr.cpp +++ b/eval/src/apps/eval_expr/eval_expr.cpp @@ -17,8 +17,13 @@ #include #include #include +#include +#include #include #include +#include +#include +#include #include @@ -33,6 +38,7 @@ using vespalib::slime::Inspector; using vespalib::slime::Cursor; using vespalib::Input; using vespalib::Memory; +using vespalib::SimpleBuffer; using CostProfile = std::vector>; @@ -337,6 +343,11 @@ class Collector { std::string toString() const { return _slime.toString(); } + std::string toCompactString() const { + SimpleBuffer buf; + JsonFormat::encode(_slime.get(), buf, true); + return buf.get().make_string(); + } }; struct EditLineWrapper { @@ -500,6 +511,410 @@ int json_repl_mode(Context &ctx) { } } +// like base64, but replace '/' with '-' and drop padding (note: reserved '+' is still used) +const char *symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"; +std::map make_symbol_map() { + std::map map; + for (int i = 0; i < 64; ++i) { + map[symbols[i]] = i; + } + return map; +} + +// Write bits to url-safe-ish string +struct UrlSafeBitOutput { + int bits = 0; + int num_bits = 0; + std::string result; + void write_bits(int x, int n) { + for (int i = 0; i < n; ++i) { + bits = (bits << 1) | (x & 1); + if (++num_bits == 6) { + result.push_back(symbols[bits]); + num_bits = 0; + bits = 0; + } + x >>= 1; + } + } + void flush() { + if (num_bits != 0) { + write_bits(0, 6 - num_bits); + } + } +}; + +// Read bits from url-safe-ish string +struct UrlSafeBitInput { + int bits = 0; + int num_bits = 0; + size_t offset = 0; + static constexpr int bit_read_mask = (1 << 5); + static const std::map symbol_map; + const std::string &str; + UrlSafeBitInput(const std::string &str_in) noexcept : str(str_in) {} + int read_bits(int n) { + int x = 0; + int b = 1; + for (int i = 0; i < n; ++i) { + if (num_bits == 0) { + REQUIRE(offset < str.size()); // input underflow + auto pos = symbol_map.find(str[offset++]); + REQUIRE(pos != symbol_map.end()); // invalid input character + bits = pos->second; + num_bits = 6; + } + if (bits & bit_read_mask) { + x |= b; + } + b <<= 1; + bits <<= 1; + --num_bits; + } + return x; + } +}; +const std::map UrlSafeBitInput::symbol_map = make_symbol_map(); + +// keeps track of how many bits to use for dict references +struct BitWidthTracker { + int num; + int next; + BitWidthTracker(int num_in, int next_in) noexcept + : num(num_in), next(next_in) {} + void use() { + if (--next == 0) { + next = 1 << num; + ++num; + } + } + int width() { + return num; + } +}; + +// unified dictionary satisfying the needs of both compress and decompress +struct LZDict { + std::map map; + std::vector list; + static constexpr int lit8 = 0; + static constexpr int lit16 = 1; + static constexpr int eof = 2; + LZDict() { + list.push_back(""); // 0 + list.push_back(""); // 1 + list.push_back(""); // 2 + // we cannot put these in the forward dictionary since they + // could produce duplicates which we check for + } + int size() { return list.size(); } + bool has(const std::string &key) { + return (map.count(key) == 1); + } + int add(const std::string &key) { + REQUIRE(map.count(key) == 0); // no duplicates + int value = list.size(); + list.push_back(key); + map[key] = value; + return value; + } + std::string get(int value) { + REQUIRE(value < size()); // check with size first + return list[value]; + } + int get(const std::string &key) { + REQUIRE(map.count(key) == 1); // check with has first + return map[key]; + } +}; + +// ascii-only lz_string compression (https://github.com/pieroxy/lz-string) +void compress_impl(const std::string &str, auto &bits, auto &dict, auto &dst) { + + std::set pending; + std::string ctx_wc; + std::string ctx_w; + + for (char c: str) { + std::string ctx_c(1, c); + if (!dict.has(ctx_c)) { + dict.add(ctx_c); + pending.insert(ctx_c); + } + ctx_wc = ctx_w + ctx_c; + if (dict.has(ctx_wc)) { + ctx_w = ctx_wc; + } else { + if (pending.count(ctx_w) == 1) { + REQUIRE_EQ(ctx_w.size(), 1zu); + dst.write_bits(dict.lit8, bits.width()); + dst.write_bits(ctx_w[0], 8); + bits.use(); + pending.erase(ctx_w); + } else { + dst.write_bits(dict.get(ctx_w), bits.width()); + } + bits.use(); + dict.add(ctx_wc); + ctx_w = ctx_c; + } + } + if (!ctx_w.empty()) { + if (pending.count(ctx_w) == 1) { + dst.write_bits(dict.lit8, bits.width()); + dst.write_bits(ctx_w[0], 8); + bits.use(); + pending.erase(ctx_w); + } else { + dst.write_bits(dict.get(ctx_w), bits.width()); + } + bits.use(); + } + dst.write_bits(dict.eof, bits.width()); + dst.flush(); +} + +// ascii-only lz_string decompression (https://github.com/pieroxy/lz-string) +std::string decompress_impl(auto &src, auto &bits, auto &dict) { + + std::string result; + + int c = src.read_bits(2); + if (c == dict.eof) { + return result; + } + REQUIRE_EQ(c, dict.lit8); // ascii only + c = src.read_bits(8); + std::string w(1, char(c)); + result.append(w); + dict.add(w); + + std::string entry; + for (;;) { + c = src.read_bits(bits.width()); + REQUIRE(c != dict.lit16); // ascii only + if (c == dict.eof) { + return result; + } + if (c == dict.lit8) { + c = dict.add(std::string(1, char(src.read_bits(8)))); + bits.use(); + } + REQUIRE(c <= dict.size()); // invalid dict entry + if (c == dict.size()) { + entry = w + w.substr(0, 1); + } else { + entry = dict.get(c); + } + result.append(entry); + dict.add(w + entry.substr(0, 1)); + bits.use(); + w = entry; + } +} + +// used to encode setups in tensor playground +std::string compress(const std::string &str) { + LZDict dict; + BitWidthTracker bits(2, 2); + UrlSafeBitOutput dst; + compress_impl(str, bits, dict, dst); + return dst.result; +} + +// used to test the compression code above, hence the inlined REQUIREs +std::string decompress(const std::string &str) { + LZDict dict; + BitWidthTracker bits(3, 4); + UrlSafeBitInput src(str); + return decompress_impl(src, bits, dict); +} + +// What happens during compression and decompression, the full story +struct LZLog { + static constexpr int BW = 18; + static constexpr int PW = 14; + struct Block { + std::vector writer; + std::vector reader; + void dump(size_t idx) { + if (writer.empty() && reader.empty()) { + return; + } + size_t len = reader.size() + 1; + if (idx == 0) { + len = std::max(len, writer.size()); + } else { + len = std::max(len, writer.size() + 1); + } + size_t wait = (len - writer.size()); + for (size_t i = 0; i < len; ++i) { + fprintf(stderr, "%*s%-*s%-*s\n", + BW, (i >= wait) ? writer[i - wait].c_str() : "", + PW, "", + BW, (i < reader.size()) ? reader[i].c_str() : ""); + } + } + }; + struct Packet { + int bits; + int value; + Packet(int bits_in, int value_in) noexcept + : bits(bits_in), value(value_in) {} + void dump() { + fprintf(stderr, "%*s%-*s%-*s\n", + BW, fmt("write %d bits", bits).c_str(), + PW, fmt(" -> %4d -> ", value).c_str(), + BW, fmt("read %d bits", bits).c_str()); + } + }; + std::vector blocks; + std::vector packets; + void ensure_block(size_t idx) { + while (blocks.size() <= idx) { + blocks.emplace_back(); + } + } + void writer(int block, const std::string &msg) { + ensure_block(block); + blocks[block].writer.push_back(msg); + } + int packet(int block, int bits, int value) { + if (packets.size() <= size_t(block)) { + REQUIRE_EQ(packets.size(), size_t(block)); + packets.emplace_back(bits, value); + } else { + REQUIRE_EQ(packets[block].bits, bits); + REQUIRE_EQ(packets[block].value, value); + } + return block + 1; + } + void reader(int block, const std::string &msg) { + ensure_block(block); + blocks[block].reader.push_back(msg); + } + void dump() { + std::string bsep(BW, '-'); + std::string psep(PW, '-'); + REQUIRE_EQ(blocks.size(), packets.size() + 1); + fprintf(stderr, "%s%s%s\n", bsep.c_str(), psep.c_str(), bsep.c_str()); + fprintf(stderr, "%*s%-*s%-*s\n", BW, "COMPRESS", PW, " DATA", BW, "DECOMPRESS"); + fprintf(stderr, "%s%s%s\n", bsep.c_str(), psep.c_str(), bsep.c_str()); + for (size_t i = 0; i < blocks.size(); ++i) { + blocks[i].dump(i); + if (i < packets.size()) { + packets[i].dump(); + } + } + fprintf(stderr, "%s%s%s\n", bsep.c_str(), psep.c_str(), bsep.c_str()); + } + ~LZLog(); + struct Writer { + LZLog &log; + size_t idx = 0; + LZDict dict; + BitWidthTracker bits{2,2}; + UrlSafeBitOutput dst; + Writer(LZLog &log_in) : log(log_in) {} + ~Writer(); + + static constexpr int lit8 = LZDict::lit8; + static constexpr int lit16 = LZDict::lit16; + static constexpr int eof = LZDict::eof; + + int width() { return bits.width(); } + bool has(const std::string &key) { return dict.has(key); } + int get(const std::string &key) { return dict.get(key); } + + int add(const std::string &key) { + int value = dict.add(key); + log.writer(idx, fmt("dict[%s] -> %d", key.c_str(), value)); + return value; + } + void use() { + int before = bits.width(); + bits.use(); + int after = bits.width(); + log.writer(idx, fmt("bit width %d -> %d", before, after)); + } + void write_bits(int x, int n) { + dst.write_bits(x, n); + idx = log.packet(idx, n, x); + } + void flush() { + dst.flush(); + log.writer(idx, fmt("flush bits")); + } + }; + struct Reader { + LZLog &log; + size_t idx = 0; + LZDict dict; + BitWidthTracker bits{3,4}; + UrlSafeBitInput src; + Reader(LZLog &log_in, const std::string &str) : log(log_in), src(str) {} + ~Reader(); + + static constexpr int lit8 = LZDict::lit8; + static constexpr int lit16 = LZDict::lit16; + static constexpr int eof = LZDict::eof; + + int width() { return bits.width(); } + int size() { return dict.size(); } + std::string get(int value) { return dict.get(value); } + + int read_bits(int n) { + int x = src.read_bits(n); + idx = log.packet(idx, n, x); + return x; + } + void use() { + int before = bits.width(); + bits.use(); + int after = bits.width(); + log.reader(idx, fmt("bit width %d -> %d", before, after)); + } + int add(const std::string &key) { + int value = dict.add(key); + log.reader(idx, fmt("dict[%s] -> %d", key.c_str(), value)); + return value; + } + }; + static LZLog analyze(const std::string &str) { + LZLog log; + Writer writer(log); + compress_impl(str, writer, writer, writer); + Reader reader(log, writer.dst.result); + auto res = decompress_impl(reader, reader, reader); + REQUIRE_EQ(res, str); + return log; + } +}; + +LZLog::~LZLog() = default; +LZLog::Writer::~Writer() = default; +LZLog::Reader::~Reader() = default; + +void verify_compr(std::string str) { + auto compr = compress(str); + auto res = decompress(compr); + REQUIRE_EQ(str, res); + fprintf(stderr, "'%s' -> '%s' -> '%s'\n", str.c_str(), compr.c_str(), res.c_str()); + auto log = LZLog::analyze(str); + log.dump(); +} + +void run_tests() { + REQUIRE_EQ(strlen(symbols), 64zu); + verify_compr(""); + verify_compr("abcdef"); + verify_compr("aaaaaa"); + verify_compr("baaaaaa"); + verify_compr("cbaaaaaa"); + verify_compr("ababababababab"); + verify_compr("a and b and c and d"); +} + int main(int argc, char **argv) { bool verbose = ((argc > 1) && (std::string(argv[1]) == "--verbose")); int expr_idx = verbose ? 2 : 1; @@ -538,9 +953,35 @@ int main(int argc, char **argv) { return 3; } } + if ((expr_cnt == 3) && + (std::string(argv[expr_idx]) == "interactive") && + (std::string(argv[expr_idx + 2]) == "link")) + { + setlocale(LC_ALL, ""); + Collector collector; + collector.enable(); + interactive_mode(ctx, Script::from_file(argv[expr_idx + 1])->script_only(true), collector); + if (collector.error().empty()) { + auto hash = compress(collector.toCompactString()); + fprintf(stdout, "https://docs.vespa.ai/playground/#%s\n", hash.c_str()); + return 0; + } else { + fprintf(stderr, "conversion failed: %s\n", collector.error().c_str()); + return 3; + } + } if ((expr_cnt == 1) && (std::string(argv[expr_idx]) == "json-repl")) { return json_repl_mode(ctx); } + if ((expr_cnt == 1) && (std::string(argv[expr_idx]) == "test")) { + try { + run_tests(); + } catch (std::exception &e) { + fprintf(stderr, "test failed: %s\n", e.what()); + return 3; + } + return 0; + } ctx.verbose(verbose); std::string name("a"); for (int i = expr_idx; i < argc; ++i) { From b6f4c8267d01e1471a376e636135ba71afcfdd4e Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Mon, 18 Nov 2024 16:32:55 +0100 Subject: [PATCH 066/126] Define delay-host-security-agent-start-minutes flag --- .../src/main/java/com/yahoo/vespa/flags/PermanentFlags.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java index 8d1d6c9f94e6..11f0f1edd020 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java @@ -278,6 +278,12 @@ public class PermanentFlags { INSTANCE_ID ); + public static final UnboundIntFlag DELAY_HOST_SECURITY_AGENT_START_MINUTES = defineIntFlag( + "delay-host-security-agent-start-minutes", 5, + "The number of minutes (from host admin start) to delay the start of the host security agent", + "Takes effect on next host-admin tick", + NODE_TYPE); + // This must be set in a feature flag to avoid flickering between the new and old value during config server upgrade public static final UnboundDoubleFlag HOST_MEMORY = defineDoubleFlag( "host-memory", -1.0, From 7d7b3d7f17550a769449627412c23f009d05ecc4 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 18 Nov 2024 17:03:48 +0100 Subject: [PATCH 067/126] Move bloat check to avoid leaky abstraction. --- searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp | 2 +- searchlib/src/vespa/searchlib/diskindex/field_index.cpp | 5 ++--- searchlib/src/vespa/searchlib/diskindex/field_index.h | 2 +- .../src/vespa/searchlib/diskindex/zcposoccrandread.cpp | 5 ++++- searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h | 2 +- searchlib/src/vespa/searchlib/index/postinglistfile.cpp | 4 ++-- searchlib/src/vespa/searchlib/index/postinglistfile.h | 6 +++--- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index cdad5118b138..49f4b1ece23f 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -251,7 +251,7 @@ DiskIndexTest::requireThatWeCanReadPostingList() auto r = _index->lookup(0, "w1"); auto& field_index = _index->get_field_index(0); auto h = field_index.read_posting_list(r); - if (field_index.get_posting_list_cache_enabled()) { + if (field_index.is_posting_list_cache_enabled()) { EXPECT_GT(64, h._allocSize); } auto sb = field_index.create_iterator(r, h, mda); diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index 9ad02e7adff5..2be17db287d9 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -178,9 +178,8 @@ FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_resu if (handle._read_bytes != 0) { _cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes); } - if (trim && - handle._allocSize > (16 + ((lookup_result.bitOffset & 63) + lookup_result.counts._bitLength + 63) / 8) * 1.2) { - _posting_file->trim_posting_list(lookup_result, handle); // Trim posting list if more than 20% bloat + if (trim) { + _posting_file->consider_trim_posting_list(lookup_result, handle); // Trim posting list if more than 20% bloat } return handle; } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index 0e41ffc3eb6a..2b8125c814b5 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -88,7 +88,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); } FieldIndexStats get_stats() const; uint32_t get_field_id() const noexcept { return _field_id; } - bool get_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; } + bool is_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; } }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp index cb62370cc7f2..50e5f1bab7ca 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -139,7 +139,7 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) } void -ZcPosOccRandRead::trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const +ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const { if (lookup_result.counts._bitLength == 0 || _memoryMapped) { return; @@ -157,6 +157,9 @@ ZcPosOccRandRead::trim_posting_list(const DictionaryLookupResult& lookup_result, return; } assert(handle._allocSize >= malloc_len); + if (handle._allocSize <= malloc_len * 1.2) { + return; + } auto *mem = malloc(malloc_len); assert(mem != nullptr); memcpy(mem, handle._mem, malloc_len); diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h index bd5097fcdeab..ed624c3aabc7 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h @@ -44,7 +44,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead * Read (possibly partial) posting list into handle. */ PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; - void trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const override; + void consider_trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp index 733b8009f447..c2e53950a100 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp @@ -111,10 +111,10 @@ PostingListFileRandReadPassThrough::read_posting_list(const DictionaryLookupResu } void -PostingListFileRandReadPassThrough::trim_posting_list(const DictionaryLookupResult &lookup_result, +PostingListFileRandReadPassThrough::consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle) const { - return _lower->trim_posting_list(lookup_result, handle); + return _lower->consider_trim_posting_list(lookup_result, handle); } bool diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h index 75cc3c65d158..091b3447b21d 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -162,9 +162,9 @@ class PostingListFileRandRead { virtual PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) = 0; /** - * Remove directio padding from posting list. + * Remove directio padding from posting list if bloat is excessive (>20%) */ - virtual void trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const = 0; + virtual void consider_trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const = 0; /** * Open posting list file for random read. @@ -203,7 +203,7 @@ class PostingListFileRandReadPassThrough : public PostingListFileRandRead { const search::fef::TermFieldMatchDataArray &matchData) const override; PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; - void trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const override; + void consider_trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; From 8097749cffdb66b297b459a39fd1f4afaa59948f Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 18 Nov 2024 17:22:45 +0100 Subject: [PATCH 068/126] Pass bloat factor to consider_trim_posting_list. --- searchlib/src/vespa/searchlib/diskindex/field_index.cpp | 2 +- .../src/vespa/searchlib/diskindex/zcposoccrandread.cpp | 5 +++-- .../src/vespa/searchlib/diskindex/zcposoccrandread.h | 3 ++- searchlib/src/vespa/searchlib/index/postinglistfile.cpp | 4 ++-- searchlib/src/vespa/searchlib/index/postinglistfile.h | 8 +++++--- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index 2be17db287d9..68f4aabab38f 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -179,7 +179,7 @@ FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_resu _cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes); } if (trim) { - _posting_file->consider_trim_posting_list(lookup_result, handle); // Trim posting list if more than 20% bloat + _posting_file->consider_trim_posting_list(lookup_result, handle, 0.2); // Trim posting list if more than 20% bloat } return handle; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp index 50e5f1bab7ca..55ea07b61464 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -139,7 +139,8 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) } void -ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const +ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, + double bloat_factor) const { if (lookup_result.counts._bitLength == 0 || _memoryMapped) { return; @@ -157,7 +158,7 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult& looku return; } assert(handle._allocSize >= malloc_len); - if (handle._allocSize <= malloc_len * 1.2) { + if (handle._allocSize <= malloc_len * (1.0 + bloat_factor)) { return; } auto *mem = malloc(malloc_len); diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h index ed624c3aabc7..63da1cf883be 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h @@ -44,7 +44,8 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead * Read (possibly partial) posting list into handle. */ PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; - void consider_trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const override; + void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, + double bloat_factor) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp index c2e53950a100..ce3f3994e2af 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp @@ -112,9 +112,9 @@ PostingListFileRandReadPassThrough::read_posting_list(const DictionaryLookupResu void PostingListFileRandReadPassThrough::consider_trim_posting_list(const DictionaryLookupResult &lookup_result, - PostingListHandle &handle) const + PostingListHandle &handle, double bloat_factor) const { - return _lower->consider_trim_posting_list(lookup_result, handle); + return _lower->consider_trim_posting_list(lookup_result, handle, bloat_factor); } bool diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h index 091b3447b21d..29aa44de08b3 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -162,9 +162,10 @@ class PostingListFileRandRead { virtual PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) = 0; /** - * Remove directio padding from posting list if bloat is excessive (>20%) + * Remove directio padding from posting list if bloat is excessive. */ - virtual void consider_trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const = 0; + virtual void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, + double bloat_factor) const = 0; /** * Open posting list file for random read. @@ -203,7 +204,8 @@ class PostingListFileRandReadPassThrough : public PostingListFileRandRead { const search::fef::TermFieldMatchDataArray &matchData) const override; PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; - void consider_trim_posting_list(const DictionaryLookupResult& lookup_result, PostingListHandle& handle) const override; + void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, + double bloat_factor) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; From d5940bda20699cfbef6efa86f58ee63db35c14db Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Mon, 18 Nov 2024 21:29:54 +0100 Subject: [PATCH 069/126] Support building another version than latest first when building config models To support building only specified version, e.g. when an application is pinned to an older version because the latest one has a config model change that prevents it from working. --- .../config/server/deploy/Deployment.java | 1 + .../server/modelfactory/ModelsBuilder.java | 41 ++++++++++++------- .../config/server/session/PrepareParams.java | 24 +++++++++++ .../vespa/config/server/session/Session.java | 4 +- .../config/server/session/SessionData.java | 6 +++ .../server/session/SessionPreparer.java | 9 +++- .../server/session/SessionRepository.java | 2 + .../server/session/SessionSerializer.java | 6 ++- .../session/SessionZooKeeperClient.java | 16 ++++++-- .../server/deploy/HostedDeployTest.java | 29 +++++++++++++ .../server/session/PrepareParamsTest.java | 10 +++++ .../session/SessionZooKeeperClientTest.java | 1 + 12 files changed, 128 insertions(+), 21 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java index bd3a081d2077..2e87ee52d5a6 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java @@ -249,6 +249,7 @@ private static Supplier createPrepareParams( PrepareParams.Builder params = new PrepareParams.Builder() .applicationId(session.getApplicationId()) .vespaVersion(session.getVespaVersion().toString()) + .vespaVersionToBuildFirst(session.getVersionToBuildFirst()) .timeoutBudget(timeoutBudget) .ignoreValidationErrors(ignoreValidationErrors) .isBootstrap(isBootstrap) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java index 171421800c4e..76a2405767de 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java @@ -86,6 +86,7 @@ public abstract class ModelsBuilder { public List buildModels(ApplicationId applicationId, Optional dockerImageRepository, Version wantedNodeVespaVersion, + Optional versionToBuildFirst, ApplicationPackage applicationPackage, AllocatedHostsFromAllModels allocatedHosts, Instant now) { @@ -103,12 +104,15 @@ public List buildModels(ApplicationId applicationId, boolean buildLatestModelForThisMajor = true; for (int i = 0; i < majorVersions.size(); i++) { int majorVersion = majorVersions.get(i); + log.log(Level.FINE, "Building major " + majorVersion + ", versionToBuildFirst=" + versionToBuildFirst); try { builtModels.addAll(buildModelVersions(keepMajorVersion(majorVersion, versions), applicationId, dockerImageRepository, wantedNodeVespaVersion, applicationPackage, allocatedHosts, now, - buildLatestModelForThisMajor, majorVersion)); + buildLatestModelForThisMajor, + versionToBuildFirst, majorVersion)); buildLatestModelForThisMajor = false; // We have successfully built latest model version, do it only for this major + versionToBuildFirst = Optional.empty(); // Set to empty, cannot build this first on another major } catch (NodeAllocationException | ApplicationLockException | TransientException | QuotaExceededException e) { // Don't wrap this exception, and don't try to load other model versions as this is (most likely) @@ -186,20 +190,21 @@ private List buildModelVersions(Set versions, AllocatedHostsFromAllModels allocatedHosts, Instant now, boolean buildLatestModelForThisMajor, + Optional versionToBuildFirst, int majorVersion) { List built = new ArrayList<>(); if (buildLatestModelForThisMajor) { - var latest = findLatest(versions); - var latestModelVersion = buildModelVersion(modelFactoryRegistry.getFactory(latest), - applicationPackage, - applicationId, - wantedDockerImageRepository, - wantedNodeVespaVersion); - allocatedHosts.add(latestModelVersion.getModel().allocatedHosts(), latest); - built.add(latestModelVersion); + if (versionToBuildFirst.isEmpty()) + versionToBuildFirst = Optional.of(findLatest(versions)); + var builtFirst = buildModelVersion(modelFactoryRegistry.getFactory(versionToBuildFirst.get()), + applicationPackage, + applicationId, + wantedDockerImageRepository, + wantedNodeVespaVersion); + allocatedHosts.add(builtFirst.getModel().allocatedHosts(), versionToBuildFirst.get()); + built.add(builtFirst); } - // load old model versions versions = versionsToBuild(versions, wantedNodeVespaVersion, majorVersion, allocatedHosts); for (Version version : versions) { if (alreadyBuilt(version, built)) continue; @@ -213,11 +218,8 @@ private List buildModelVersions(Set versions, allocatedHosts.add(modelVersion.getModel().allocatedHosts(), version); built.add(modelVersion); } catch (RuntimeException e) { - // allow failure to create old config models if there is a validation override that allow skipping old - // config models, or we're manually deploying - if (! built.isEmpty() && - ( built.get(0).getModel().skipOldConfigModels(now) || zone().environment().isManuallyDeployed())) - log.log(Level.WARNING, applicationId + ": Failed to build version " + version + + if (allowBuildToFail(now, built)) + log.log(Level.INFO, applicationId + ": Failed to build version " + version + ", but allow failure due to validation override or manual deployment:" + Exceptions.toMessageString(e)); else { @@ -229,6 +231,15 @@ private List buildModelVersions(Set versions, return built; } + /** + * Allow build of other config models to fail if there is a validation override that allow skipping old + * config models, or we're manually deploying + */ + private boolean allowBuildToFail(Instant now, List built) { + return ! built.isEmpty() && + (built.get(0).getModel().skipOldConfigModels(now) || zone().environment().isManuallyDeployed()); + } + private static boolean alreadyBuilt(Version version, List built) { return built.stream() .map(modelresult -> modelresult.getModel().version()) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java index c46cc8044049..e7cbf5245366 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java @@ -48,6 +48,7 @@ public final class PrepareParams { static final String DRY_RUN_PARAM_NAME = "dryRun"; static final String VERBOSE_PARAM_NAME = "verbose"; static final String VESPA_VERSION_PARAM_NAME = "vespaVersion"; + static final String VESPA_VERSION_TO_BUILD_FIRST_PARAM_NAME = "vespaVersionToBuildFirst"; static final String CONTAINER_ENDPOINTS_PARAM_NAME = "containerEndpoints"; static final String ENDPOINT_CERTIFICATE_METADATA_PARAM_NAME = "endpointCertificateMetadata"; static final String DOCKER_IMAGE_REPOSITORY = "dockerImageRepository"; @@ -71,6 +72,7 @@ public final class PrepareParams { private final boolean force; private final boolean waitForResourcesInPrepare; private final Optional vespaVersion; + private final Optional vespaVersionToBuildFirst; private final List containerEndpoints; private final Optional endpointCertificateMetadata; private final Optional dockerImageRepository; @@ -90,6 +92,7 @@ private PrepareParams(ApplicationId applicationId, boolean isBootstrap, boolean isInternalRedeployment, Optional vespaVersion, + Optional vespaVersionToBuildFirst, List containerEndpoints, Optional endpointCertificateMetadata, Optional dockerImageRepository, @@ -110,6 +113,7 @@ private PrepareParams(ApplicationId applicationId, this.isBootstrap = isBootstrap; this.isInternalRedeployment = isInternalRedeployment; this.vespaVersion = vespaVersion; + this.vespaVersionToBuildFirst = vespaVersionToBuildFirst; this.containerEndpoints = containerEndpoints; this.endpointCertificateMetadata = endpointCertificateMetadata; this.dockerImageRepository = dockerImageRepository; @@ -136,6 +140,7 @@ public static class Builder { private ApplicationId applicationId = null; private TimeoutBudget timeoutBudget = new TimeoutBudget(Clock.systemUTC(), Duration.ofSeconds(60)); private Optional vespaVersion = Optional.empty(); + private Optional vespaVersionToBuildFirst = Optional.empty(); private List containerEndpoints = null; private Optional endpointCertificateMetadata = Optional.empty(); private Optional dockerImageRepository = Optional.empty(); @@ -198,6 +203,19 @@ public Builder vespaVersion(Version vespaVersion) { return this; } + public Builder vespaVersionToBuildFirst(String version) { + Optional v = Optional.empty(); + if (version != null && !version.isEmpty()) { + v = Optional.of(Version.fromString(version)); + } + return vespaVersionToBuildFirst(v); + } + + public Builder vespaVersionToBuildFirst(Optional version) { + this.vespaVersionToBuildFirst = version; + return this; + } + public Builder containerEndpoints(String serialized) { this.containerEndpoints = (serialized == null) ? List.of() @@ -314,6 +332,7 @@ public PrepareParams build() { isBootstrap, isInternalRedeployment, vespaVersion, + vespaVersionToBuildFirst, containerEndpoints, endpointCertificateMetadata, dockerImageRepository, @@ -337,6 +356,7 @@ public static PrepareParams fromHttpRequest(HttpRequest request, TenantName tena .timeoutBudget(SessionHandler.getTimeoutBudget(request, barrierTimeout)) .applicationId(createApplicationId(request, tenant)) .vespaVersion(request.getProperty(VESPA_VERSION_PARAM_NAME)) + .vespaVersionToBuildFirst(request.getProperty(VESPA_VERSION_TO_BUILD_FIRST_PARAM_NAME)) .containerEndpoints(request.getProperty(CONTAINER_ENDPOINTS_PARAM_NAME)) .endpointCertificateMetadata(request.getProperty(ENDPOINT_CERTIFICATE_METADATA_PARAM_NAME)) .dockerImageRepository(request.getProperty(DOCKER_IMAGE_REPOSITORY)) @@ -360,6 +380,7 @@ public static PrepareParams fromJson(byte[] json, TenantName tenant, Duration ba .timeoutBudget(SessionHandler.getTimeoutBudget(getTimeout(params, barrierTimeout))) .applicationId(createApplicationId(params, tenant)) .vespaVersion(SlimeUtils.optionalString(params.field(VESPA_VERSION_PARAM_NAME)).orElse(null)) + .vespaVersionToBuildFirst(SlimeUtils.optionalString(params.field(VESPA_VERSION_TO_BUILD_FIRST_PARAM_NAME)).orElse(null)) .containerEndpointList(deserialize(params.field(CONTAINER_ENDPOINTS_PARAM_NAME), ContainerEndpointSerializer::endpointListFromSlime, List.of())) .endpointCertificateMetadata(deserialize(params.field(ENDPOINT_CERTIFICATE_METADATA_PARAM_NAME), EndpointCertificateMetadataSerializer::fromSlime)) .dockerImageRepository(SlimeUtils.optionalString(params.field(DOCKER_IMAGE_REPOSITORY)).orElse(null)) @@ -438,6 +459,9 @@ public String getApplicationName() { /** Returns the Vespa version the nodes running the prepared system should have, or empty to use the system version */ public Optional vespaVersion() { return vespaVersion; } + /** Returns the Vespa version to build first when building several models. A pinned application will have this set */ + public Optional vespaVersionToBuildFirst() { return vespaVersionToBuildFirst; } + /** Returns the container endpoints that should be made available for this deployment. One per cluster */ public List containerEndpoints() { return containerEndpoints; diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java index 1afc0cbcd52a..9008c837a9f7 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java @@ -132,6 +132,8 @@ public Optional getOptionalApplicationId() { public Version getVespaVersion() { return sessionZooKeeperClient.readVespaVersion(); } + public Optional getVersionToBuildFirst() { return sessionZooKeeperClient.readVersionToBuildFirst(); } + public Optional getAthenzDomain() { return sessionZooKeeperClient.readAthenzDomain(); } public Optional getQuota() { return sessionZooKeeperClient.readQuota(); } @@ -194,7 +196,7 @@ public ApplicationFile getApplicationFile(Path relativePath, LocalSession.Mode m return getApplicationPackage().getFile(relativePath); } - Optional applicationVersions() { return Optional.empty(); } + public Optional applicationVersions() { return Optional.empty(); } private void markSessionEdited() { setStatus(Session.Status.NEW); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java index a6180b37d5ac..e22af1262825 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java @@ -36,6 +36,7 @@ public record SessionData(ApplicationId applicationId, Optional applicationPackageReference, Version version, + Optional versionToBuildFirst, Instant created, Optional dockerImageRepository, Optional athenzDomain, @@ -51,6 +52,7 @@ public record SessionData(ApplicationId applicationId, static final String APPLICATION_ID_PATH = "applicationId"; static final String APPLICATION_PACKAGE_REFERENCE_PATH = "applicationPackageReference"; static final String VERSION_PATH = "version"; + static final String VERSION_TO_BUILD_FIRST_PATH = "versionToBuildFirst"; static final String CREATE_TIME_PATH = "createTime"; static final String DOCKER_IMAGE_REPOSITORY_PATH = "dockerImageRepository"; static final String ATHENZ_DOMAIN = "athenzDomain"; @@ -78,6 +80,7 @@ private void toSlime(Cursor object) { object.setString(APPLICATION_ID_PATH, applicationId.serializedForm()); applicationPackageReference.ifPresent(ref -> object.setString(APPLICATION_PACKAGE_REFERENCE_PATH, ref.value())); object.setString(VERSION_PATH, version.toString()); + versionToBuildFirst.ifPresent(v -> object.setString(VERSION_TO_BUILD_FIRST_PATH, v.toString())); object.setLong(CREATE_TIME_PATH, created.toEpochMilli()); dockerImageRepository.ifPresent(image -> object.setString(DOCKER_IMAGE_REPOSITORY_PATH, image.asString())); athenzDomain.ifPresent(domain -> object.setString(ATHENZ_DOMAIN, domain.value())); @@ -105,6 +108,9 @@ static SessionData fromSlime(Slime slime) { return new SessionData(ApplicationId.fromSerializedForm(cursor.field(APPLICATION_ID_PATH).asString()), optionalString(cursor.field(APPLICATION_PACKAGE_REFERENCE_PATH)).map(FileReference::new), Version.fromString(cursor.field(VERSION_PATH).asString()), + SlimeUtils.isPresent(cursor.field(VERSION_TO_BUILD_FIRST_PATH)) + ? Optional.of(Version.fromString(cursor.field(VERSION_TO_BUILD_FIRST_PATH).asString())) + : Optional.empty(), Instant.ofEpochMilli(cursor.field(CREATE_TIME_PATH).asLong()), optionalString(cursor.field(DOCKER_IMAGE_REPOSITORY_PATH)).map(DockerImage::fromString), optionalString(cursor.field(ATHENZ_DOMAIN)).map(AthenzDomain::from), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java index 4c9cc7aca06e..4a3af28f1b42 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java @@ -178,6 +178,8 @@ private class Preparation { /** The version of Vespa the application to be prepared specifies for its nodes */ final Version vespaVersion; + /** The version of Vespa to build first when there are several config models, empty if latest version should be built first */ + final Optional vespaVersionToBuildFirst; final ContainerEndpointsCache containerEndpointsCache; final List containerEndpoints; @@ -206,6 +208,7 @@ private class Preparation { this.applicationId = params.getApplicationId(); this.dockerImageRepository = params.dockerImageRepository(); this.vespaVersion = params.vespaVersion().orElse(Vtag.currentVersion); + this.vespaVersionToBuildFirst = params.vespaVersionToBuildFirst(); this.containerEndpointsCache = new ContainerEndpointsCache(tenantPath, curator); this.endpointCertificateMetadataStore = new EndpointCertificateMetadataStore(curator, tenantPath); EndpointCertificateRetriever endpointCertificateRetriever = new EndpointCertificateRetriever(endpointCertificateSecretStores); @@ -337,7 +340,8 @@ void vespaPreprocess(File appDir, File inputXml, ApplicationMetaData metaData, T AllocatedHosts buildModels(Instant now) { var allocatedHosts = new AllocatedHostsFromAllModels(); this.modelResultList = preparedModelsBuilder.buildModels(applicationId, dockerImageRepository, vespaVersion, - preprocessedApplicationPackage, allocatedHosts, now); + vespaVersionToBuildFirst, preprocessedApplicationPackage, + allocatedHosts, now); checkTimeout("build models"); return allocatedHosts.toAllocatedHosts(); } @@ -357,6 +361,7 @@ void writeStateZK(FileReference filereference) { Optional.of(filereference), dockerImageRepository, vespaVersion, + vespaVersionToBuildFirst, logger, prepareResult.getFileRegistries(), prepareResult.allocatedHosts(), @@ -402,6 +407,7 @@ private void writeStateToZooKeeper(SessionZooKeeperClient zooKeeperClient, Optional fileReference, Optional dockerImageRepository, Version vespaVersion, + Optional versionToBuildFirst, DeployLogger deployLogger, Map fileRegistryMap, AllocatedHosts allocatedHosts, @@ -422,6 +428,7 @@ private void writeStateToZooKeeper(SessionZooKeeperClient zooKeeperClient, fileReference, dockerImageRepository, vespaVersion, + versionToBuildFirst, athenzDomain, quota, tenantVaults, diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index accf78fe82dd..7ecf617dabf5 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -547,6 +547,7 @@ private ApplicationVersions loadApplication(Session session, Optional fileReference, Optional dockerImageRepository, - Version vespaVersion, Optional athenzDomain, Optional quota, + Version vespaVersion, Optional versionToBuildFirst, + Optional athenzDomain, Optional quota, List tenantVaults, List tenantSecretStores, List operatorCertificates, Optional cloudAccount, List dataplaneTokens, ActivationTriggers activationTriggers, @@ -40,6 +41,7 @@ void write(SessionZooKeeperClient zooKeeperClient, ApplicationId applicationId, zooKeeperClient.writeApplicationId(applicationId); zooKeeperClient.writeApplicationPackageReference(fileReference); zooKeeperClient.writeVespaVersion(vespaVersion); + zooKeeperClient.writeVersionToBuildFirst(versionToBuildFirst); zooKeeperClient.writeDockerImageRepository(dockerImageRepository); zooKeeperClient.writeAthenzDomain(athenzDomain); zooKeeperClient.writeQuota(quota); @@ -53,6 +55,7 @@ void write(SessionZooKeeperClient zooKeeperClient, ApplicationId applicationId, zooKeeperClient.writeSessionData(new SessionData(applicationId, fileReference, vespaVersion, + versionToBuildFirst, created, dockerImageRepository, athenzDomain, @@ -81,6 +84,7 @@ private static SessionData readSessionDataFromLegacyPaths(SessionZooKeeperClient return new SessionData(zooKeeperClient.readApplicationId(), zooKeeperClient.readApplicationPackageReference(), zooKeeperClient.readVespaVersion(), + zooKeeperClient.readVersionToBuildFirst(), zooKeeperClient.readCreateTime(), zooKeeperClient.readDockerImageRepository(), zooKeeperClient.readAthenzDomain(), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java index fd76a07a7d65..5c834203cba7 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java @@ -54,6 +54,7 @@ import static com.yahoo.vespa.config.server.session.SessionData.CREATE_TIME_PATH; import static com.yahoo.vespa.config.server.session.SessionData.DATAPLANE_TOKENS_PATH; import static com.yahoo.vespa.config.server.session.SessionData.DOCKER_IMAGE_REPOSITORY_PATH; +import static com.yahoo.vespa.config.server.session.SessionData.VERSION_TO_BUILD_FIRST_PATH; import static com.yahoo.vespa.config.server.session.SessionData.OPERATOR_CERTIFICATES_PATH; import static com.yahoo.vespa.config.server.session.SessionData.QUOTA_PATH; import static com.yahoo.vespa.config.server.session.SessionData.SESSION_DATA_PATH; @@ -191,9 +192,9 @@ private Path applicationPackageReferencePath() { return sessionPath.append(APPLICATION_PACKAGE_REFERENCE_PATH); } - private Path versionPath() { - return sessionPath.append(VERSION_PATH); - } + private Path versionPath() { return sessionPath.append(VERSION_PATH); } + + private Path versionToBuildFirstPath() { return sessionPath.append(VERSION_TO_BUILD_FIRST_PATH); } private Path dockerImageRepositoryPath() { return sessionPath.append(DOCKER_IMAGE_REPOSITORY_PATH); @@ -231,6 +232,10 @@ public void writeVespaVersion(Version version) { curator.set(versionPath(), Utf8.toBytes(version.toString())); } + public void writeVersionToBuildFirst (Optional version) { + version.ifPresent(v -> curator.set(versionToBuildFirstPath(), Utf8.toBytes(v.toString()))); + } + public void writeSessionData(SessionData sessionData) { curator.set(sessionPath.append(SESSION_DATA_PATH), sessionData.toJson()); } @@ -251,6 +256,11 @@ public Version readVespaVersion() { }); } + public Optional readVersionToBuildFirst() { + Optional data = curator.getData(versionToBuildFirstPath()); + return data.map(d -> Version.fromString(Utf8.toString(d))); + } + public Optional readDockerImageRepository() { Optional dockerImageRepository = curator.getData(dockerImageRepositoryPath()); return dockerImageRepository.map(d -> DockerImage.fromString(Utf8.toString(d))); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java index b10d449ddbe7..2553238e7513 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java @@ -23,6 +23,7 @@ import com.yahoo.config.provision.Zone; import com.yahoo.slime.SlimeUtils; import com.yahoo.test.ManualClock; +import com.yahoo.vespa.config.server.ApplicationRepository; import com.yahoo.vespa.config.server.MockConfigConvergenceChecker; import com.yahoo.vespa.config.server.application.ApplicationReindexing; import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker; @@ -32,7 +33,9 @@ import com.yahoo.vespa.config.server.http.v2.PrepareResult; import com.yahoo.vespa.config.server.maintenance.PendingRestartsMaintainer; import com.yahoo.vespa.config.server.model.TestModelFactory; +import com.yahoo.vespa.config.server.session.LocalSession; import com.yahoo.vespa.config.server.session.PrepareParams; +import com.yahoo.vespa.config.server.session.RemoteSession; import com.yahoo.vespa.model.application.validation.change.VespaReindexAction; import com.yahoo.vespa.model.application.validation.change.VespaRestartAction; import org.junit.Rule; @@ -161,6 +164,32 @@ public void testDeployMultipleVersions() { assertEquals(9, tester.getAllocatedHostsOf(tester.applicationId()).getHosts().size()); } + @Test + public void testDeployMultipleVersionsSpecifyingWhicVersionToBuildFirst() { + List modelFactories = List.of(createHostedModelFactory(Version.fromString("8.1.0")), + createHostedModelFactory(Version.fromString("8.2.0")), + createHostedModelFactory(Version.fromString("8.3.0"))); + var tester = new DeployTester.Builder(temporaryFolder) + .hostedConfigserverConfig(Zone.defaultZone()) + .modelFactories(modelFactories) + .build(); + var appRepo = tester.applicationRepository(); + var applicationId = tester.applicationId(); + + // Deploy as usual, only wanted version is set => 8.2.0 and 8.3.0 (latest version) are built + tester.deployApp("src/test/apps/hosted/", new PrepareParams.Builder() + .vespaVersion("8.2.0")); + assertEquals("8.2.0", appRepo.getActiveSession(applicationId).get().getVespaVersion().toFullString()); + assertEquals(List.of("8.2.0", "8.3.0"), appRepo.getActiveApplicationVersions(applicationId).get().versions().stream().map(Version::toFullString).toList()); + + // Deploy with vespaVersionToBuildFirst=8.2.0 and wanted version set to 8.2.0 => only 8.2.0 version is built + tester.deployApp("src/test/apps/hosted/", new PrepareParams.Builder() + .vespaVersion("8.2.0") + .vespaVersionToBuildFirst("8.2.0")); + assertEquals("8.2.0", appRepo.getActiveSession(applicationId).get().getVespaVersion().toFullString()); + assertEquals(List.of("8.2.0"), appRepo.getActiveApplicationVersions(applicationId).get().versions().stream().map(Version::toFullString).toList()); + } + /** * Test that only the minimal set of models are created (model versions used on hosts, the wanted version * and the latest version for the latest major) diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java index 1dca5459ae80..0efdda7ce8d5 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.config.server.session; +import com.yahoo.component.Version; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.EndpointCertificateMetadata; @@ -66,6 +67,7 @@ public void testCorrectParsing() { assertFalse(prepareParams.isVerbose()); assertFalse(prepareParams.ignoreValidationErrors()); assertTrue(prepareParams.vespaVersion().isEmpty()); + assertTrue(prepareParams.vespaVersionToBuildFirst().isEmpty()); assertTrue(prepareParams.getTimeoutBudget().hasTimeLeft()); assertTrue(prepareParams.containerEndpoints().isEmpty()); assertTrue(prepareParams.cloudAccount().isEmpty()); @@ -229,6 +231,13 @@ public void testCloudAccount() { assertEquals(CloudAccount.from("012345678912"), params.cloudAccount().get()); } + @Test + public void testFirstVespaVersionToBuild() { + String json = "{\"vespaVersionToBuildFirst\": \"8.3.0\"}"; + PrepareParams params = PrepareParams.fromJson(json.getBytes(StandardCharsets.UTF_8), TenantName.defaultName(), Duration.ZERO); + assertEquals(Version.fromString("8.3.0"), params.vespaVersionToBuildFirst().get()); + } + private void assertPrepareParamsEqual(PrepareParams urlParams, PrepareParams jsonParams) { assertEquals(urlParams.ignoreValidationErrors(), jsonParams.ignoreValidationErrors()); assertEquals(urlParams.isDryRun(), jsonParams.isDryRun()); @@ -239,6 +248,7 @@ private void assertPrepareParamsEqual(PrepareParams urlParams, PrepareParams jso assertEquals(urlParams.getApplicationId(), jsonParams.getApplicationId()); assertEquals(urlParams.getTimeoutBudget().timeout(), jsonParams.getTimeoutBudget().timeout()); assertEquals(urlParams.vespaVersion(), jsonParams.vespaVersion()); + assertEquals(urlParams.vespaVersionToBuildFirst(), jsonParams.vespaVersionToBuildFirst()); assertEquals(urlParams.containerEndpoints(), jsonParams.containerEndpoints()); assertEquals(urlParams.endpointCertificateMetadata(), jsonParams.endpointCertificateMetadata()); assertEquals(urlParams.dockerImageRepository(), jsonParams.dockerImageRepository()); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java index 0eba74a3c019..26dbb5724b33 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java @@ -180,6 +180,7 @@ public void require_that_session_data_is_written_to_zk() { zkc.writeSessionData(new SessionData(ApplicationId.defaultId(), Optional.of(new FileReference("foo")), Version.fromString("8.195.1"), + Optional.empty(), Instant.now(), Optional.empty(), Optional.empty(), From d0c24ac918be0e55079697dd807163c3852de36a Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Tue, 19 Nov 2024 10:14:14 +0100 Subject: [PATCH 070/126] Include snapshot ID in response when creating snapshot --- .../provision/restapi/NodesV2ApiHandler.java | 6 +++- .../provision/restapi/NodesV2ApiTest.java | 30 ++++++++----------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java index 0a5dd5753acc..03f3eeca9e88 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java @@ -266,7 +266,11 @@ private HttpResponse snapshotEncryptionKey(SnapshotId id, String hostname, Inspe private HttpResponse snapshot(String hostname) { Snapshot snapshot = nodeRepository.snapshots().create(hostname, nodeRepository.clock().instant()); - return new MessageResponse("Triggered a new snapshot of " + hostname + ": " + snapshot.id()); + Slime slime = new Slime(); + Cursor root = slime.setObject(); + root.setString("id", snapshot.id().toString()); + root.setString("message", "Triggered a new snapshot of " + hostname + ": " + snapshot.id()); + return new SlimeJsonResponse(slime); } private HttpResponse restoreSnapshot(SnapshotId id, String hostname) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java index c33e45661f9e..ed07ac042725 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java @@ -36,6 +36,7 @@ import java.util.Optional; import java.util.stream.Collectors; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; /** @@ -880,23 +881,20 @@ public void test_snapshots() throws IOException { SecretVersionId.of("1"))); // Trigger creation of snapshots - tester.assertResponseContains(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com", - new byte[0], Request.Method.POST), - "{\"message\":\"Triggered a new snapshot of host4.yahoo.com:"); + String createSnapshotResponse = tester.container() + .handleRequest(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com", new byte[0], Request.Method.POST)) + .getBodyAsString(); + String id0 = SlimeUtils.jsonToSlime(createSnapshotResponse).get().field("id").asString(); + assertEquals("{\"id\":\"" + id0 + "\",\"message\":\"Triggered a new snapshot of host4.yahoo.com: " + id0 + "\"}", + createSnapshotResponse); tester.assertResponseContains(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com", new byte[0], Request.Method.POST), "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot trigger new snapshot: Node host4.yahoo.com is busy with snapshot"); tester.assertResponseContains(new Request("http://localhost:8080/nodes/v2/snapshot/host2.yahoo.com", new byte[0], Request.Method.POST), - "{\"message\":\"Triggered a new snapshot of host2.yahoo.com:"); + "\"message\":\"Triggered a new snapshot of host2.yahoo.com:"); // List snapshots - String listResponse = tester.container() - .handleRequest(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com")) - .getBodyAsString(); - String id0 = SlimeUtils.entriesStream(SlimeUtils.jsonToSlime(listResponse).get().field("snapshots")) - .findFirst().get() - .field("id").asString(); assertFile(new Request("http://localhost:8080/nodes/v2/snapshot"), "snapshot/list.json"); assertFile(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com"), "snapshot/list-host.json"); assertFile(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com/" + id0), "snapshot/single.json"); @@ -922,14 +920,10 @@ public void test_snapshots() throws IOException { "{\"sealedSharedKey\""); // Trigger another snapshot - tester.assertResponseContains(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com", - new byte[0], Request.Method.POST), - "{\"message\":\"Triggered a new snapshot of host4.yahoo.com:"); - listResponse = tester.container() - .handleRequest(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com")) - .getBodyAsString(); - String id1 = SlimeUtils.entriesStream(SlimeUtils.jsonToSlime(listResponse).get().field("snapshots")) - .toList().get(1).field("id").asString(); + createSnapshotResponse = tester.container() + .handleRequest(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com", new byte[0], Request.Method.POST)) + .getBodyAsString(); + String id1 = SlimeUtils.jsonToSlime(createSnapshotResponse).get().field("id").asString(); // Cannot trigger restore while busy with a different snapshot tester.assertResponse(new Request("http://localhost:8080/nodes/v2/snapshot/host4.yahoo.com/" + id0 + "/restore", From 281e5cb4afeb1ca3a85e93361aaeedb7aee31805 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 19 Nov 2024 11:30:29 +0100 Subject: [PATCH 071/126] Update factory link. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a91e0cbd30c1..ba3fa66f27d9 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ A new release of Vespa is made from this repository's master branch every mornin - Home page: [https://vespa.ai](https://vespa.ai) - Documentation: [https://docs.vespa.ai](https://docs.vespa.ai) -- Continuous build: [https://factory.vespa.oath.cloud](https://factory.vespa.oath.cloud) +- Continuous build: [https://factory.vespa.ai](https://factory.vespa.ai) - Run applications in the cloud for free: [https://cloud.vespa.ai](https://cloud.vespa.ai) ## Table of contents From e01c1f4ae76880cb9f57b75f2d760a0db008e7b5 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 19 Nov 2024 12:41:12 +0100 Subject: [PATCH 072/126] Populate read metrics for memory mapped posting lists. --- .../vespa/searchlib/diskindex/zcposoccrandread.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp index 55ea07b61464..233a144b39a2 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -93,16 +94,17 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) uint64_t startOffset = (lookup_result.bitOffset + _headerBitSize) >> 3; // Align start at 64-bit boundary startOffset -= (startOffset & 7); + uint64_t endOffset = (lookup_result.bitOffset + _headerBitSize + + lookup_result.counts._bitLength + 7) >> 3; + // Align end at 64-bit boundary + endOffset += (-endOffset & 7); void *mapPtr = _file->MemoryMapPtr(startOffset); if (mapPtr != nullptr) { handle._mem = mapPtr; + size_t pad_before = startOffset - vespalib::round_down_to_page_boundary(startOffset); + handle._read_bytes = vespalib::round_up_to_page_size(pad_before + endOffset - startOffset + decode_prefetch_size); } else { - uint64_t endOffset = (lookup_result.bitOffset + _headerBitSize + - lookup_result.counts._bitLength + 7) >> 3; - // Align end at 64-bit boundary - endOffset += (-endOffset & 7); - uint64_t vectorLen = endOffset - startOffset; size_t padBefore; size_t padAfter; From baa9ce7b744ff33f6e5386f210b940b4d2adb0da Mon Sep 17 00:00:00 2001 From: Morten Tokle Date: Tue, 19 Nov 2024 13:03:02 +0100 Subject: [PATCH 073/126] Make refresh interval configurable --- .../vespa/model/container/xml/CloudAsmSecrets.java | 3 ++- .../resources/configdefinitions/asm-secret.def | 1 + .../java/ai/vespa/secret/aws/AsmSecretReader.java | 14 +++++++++----- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java index 1618ba05e3b7..44c0eeae5fb3 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java @@ -44,7 +44,8 @@ public CloudAsmSecrets(URI ztsUri, AthenzDomain athenzDomain, @Override public void getConfig(AsmSecretConfig.Builder builder) { builder.ztsUri(ztsUri.toString()) - .athenzDomain(athenzDomain.value()); + .athenzDomain(athenzDomain.value()) + .refreshInterval(1); // 1 minute } @Override diff --git a/container-disc/src/main/resources/configdefinitions/asm-secret.def b/container-disc/src/main/resources/configdefinitions/asm-secret.def index d9a2ce3743d8..f17ed686827a 100644 --- a/container-disc/src/main/resources/configdefinitions/asm-secret.def +++ b/container-disc/src/main/resources/configdefinitions/asm-secret.def @@ -4,6 +4,7 @@ package=ai.vespa.secret.config.aws ztsUri string athenzDomain string default="" +refreshInterval int default=30 # refresh interval in minutes # TODO: unused, remove after model version xxx.yyy has rolled out of hosted Vespa tenant string default="" diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java index 0c8e2cb63b75..19ad2e18b35a 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java @@ -37,29 +37,32 @@ public abstract class AsmSecretReader extends AsmSecretStoreBase implements TypedSecretStore { - private static final Duration CACHE_EXPIRE = Duration.ofMinutes(30); + private static final Duration DEFAULT_REFRESH_INTERVAL = Duration.ofMinutes(30); private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(5); private final LoadingCache cache; private final Runnable ztsClientCloser; + private final Duration refreshInterval; protected record VersionKey(Key key, SecretVersionId version) {} // For subclasses using dependency injection public AsmSecretReader(AsmSecretConfig config, ServiceIdentityProvider identities) { this(ztsClient(URI.create(config.ztsUri()), identities.getIdentitySslContext()), - athenzDomain(config, identities)); + athenzDomain(config, identities), + Duration.ofMinutes(config.refreshInterval())); } public AsmSecretReader(URI ztsUri, SSLContext sslContext, AthenzDomain domain) { - this(ztsClient(ztsUri, sslContext), domain); + this(ztsClient(ztsUri, sslContext), domain, DEFAULT_REFRESH_INTERVAL); } - private AsmSecretReader(ZtsClient ztsClient, AthenzDomain domain) { + private AsmSecretReader(ZtsClient ztsClient, AthenzDomain domain, Duration refreshInterval) { super(ztsClient, domain); cache = initCache(); ztsClientCloser = ztsClient::close; + this.refreshInterval = refreshInterval; } // For testing @@ -67,6 +70,7 @@ public AsmSecretReader(Function clientAnd super(clientAndCredentialsSupplier); cache = initCache(); ztsClientCloser = () -> {}; + this.refreshInterval = DEFAULT_REFRESH_INTERVAL; } @@ -85,7 +89,7 @@ private static AthenzDomain athenzDomain(AsmSecretConfig config, ServiceIdentity private LoadingCache initCache() { return CacheBuilder.newBuilder() - .refreshAfterWrite(CACHE_EXPIRE) + .refreshAfterWrite(refreshInterval) // See documentation for refreshAfterWrite for why we use asyncReloading. .build(CacheLoader.asyncReloading(new CacheLoader<>() { @Override From 0196b2aec26aaf74dc361591505411d669a92774 Mon Sep 17 00:00:00 2001 From: Morten Tokle Date: Tue, 19 Nov 2024 13:29:31 +0100 Subject: [PATCH 074/126] Read refreshed secret --- .../java/ai/vespa/secret/aws/SecretImpl.java | 31 +++++++++++++++++++ .../java/ai/vespa/secret/aws/SecretsImpl.java | 11 ++----- 2 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretImpl.java diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretImpl.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretImpl.java new file mode 100644 index 000000000000..d80a2be1f2e0 --- /dev/null +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretImpl.java @@ -0,0 +1,31 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package ai.vespa.secret.aws; + +import ai.vespa.secret.Secret; +import ai.vespa.secret.internal.TypedSecretStore; +import ai.vespa.secret.model.Key; +import ai.vespa.secret.model.SecretName; +import ai.vespa.secret.model.VaultName; + +/** + * @author mortent + */ +public class SecretImpl implements Secret { + + private final VaultName vaultName; + private final SecretName secretName; + private final TypedSecretStore secrets; + + public SecretImpl(VaultName vaultName, SecretName secretName, TypedSecretStore secrets) { + this.vaultName = vaultName; + this.secretName = secretName; + this.secrets = secrets; + } + + @Override + public String current() { + var secret = secrets.getSecret(new Key(vaultName, secretName)); + return secret.secretAsString(); + } +} diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretsImpl.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretsImpl.java index e54b5338125d..8900f2e42795 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretsImpl.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretsImpl.java @@ -5,7 +5,6 @@ import ai.vespa.secret.Secrets; import ai.vespa.secret.config.SecretsConfig; import ai.vespa.secret.internal.TypedSecretStore; -import ai.vespa.secret.model.Key; import ai.vespa.secret.model.SecretName; import ai.vespa.secret.model.VaultName; @@ -43,12 +42,6 @@ public Secret get(String key) { VaultName vaultName = VaultName.of(secretConfig.vault()); SecretName secretName = SecretName.of(secretConfig.name()); - var secret = secretStore.getSecret(new Key(vaultName, secretName)); - if (secret == null) { - throw new IllegalArgumentException("Secret with key '" + key + "' not found in secret store"); - } - - return secret::secretAsString; - } - + return new SecretImpl(vaultName, secretName, secretStore); + } } From 6b9c1494e06c71a9fa31f3a8baad00c00b180660 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Tue, 19 Nov 2024 12:30:16 +0000 Subject: [PATCH 075/126] add progress reporting when rebuilding HNSW index --- .../tensor/tensor_attribute_loader.cpp | 54 ++++++++++++++++++- .../tensor/tensor_attribute_loader.h | 4 +- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp index 4d862b9b76a4..70197fefffdd 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp @@ -13,10 +13,12 @@ #include #include #include +#include #include #include +#include #include -#include +#include #include #include #include @@ -41,12 +43,15 @@ bool can_use_index_save_file(const search::attribute::Config &config, const AttributeHeader& header) { if (!config.hnsw_index_params().has_value() || !header.get_hnsw_index_params().has_value()) { + LOG(warning, "Cannot use saved HNSW index for ANN, missing index parameters"); return false; } const auto &config_params = config.hnsw_index_params().value(); const auto &header_params = header.get_hnsw_index_params().value(); if ((config_params.max_links_per_node() != header_params.max_links_per_node()) || - (config_params.distance_metric() != header_params.distance_metric())) { + (config_params.distance_metric() != header_params.distance_metric())) + { + LOG(warning, "Cannot use saved HNSW index for ANN, index parameters have changed"); return false; } return true; @@ -258,16 +263,28 @@ TensorAttributeLoader::build_index(vespalib::Executor* executor, uint32_t docid_ std::unique_ptr builder; if (executor != nullptr) { builder = std::make_unique(_attr, _generation_handler, _store, *_index, *executor); + log_event("hnsw.index.rebuild.start", "execution", "multi-threaded"); } else { builder = std::make_unique(_attr, *_index); + log_event("hnsw.index.rebuild.start", "execution", "single-threaded"); } + constexpr vespalib::duration report_interval = 60s; + auto beforeStamp = vespalib::steady_clock::now(); + auto last_report = beforeStamp; for (uint32_t lid = 0; lid < docid_limit; ++lid) { auto ref = _ref_vector[lid].load_relaxed(); if (ref.valid()) { builder->add(lid); + auto now = vespalib::steady_clock::now(); + if (last_report + report_interval < now) { + log_event("hnsw.index.rebuild.progress", "percent", (lid * 100.0 / docid_limit)); + last_report = now; + } } } builder->wait_complete(); + vespalib::duration elapsedTime = vespalib::steady_clock::now() - beforeStamp; + log_event("hnsw.index.rebuild.complete", "time.elapsed.ms", vespalib::count_ms(elapsedTime)); _attr.commit(); } @@ -357,4 +374,37 @@ TensorAttributeLoader::check_consistency(uint32_t docid_limit) inconsistencies, _attr.getName().c_str(), elapsed); } + +namespace { +struct EventValue { + vespalib::JSONStringer jstr; + EventValue(const TensorAttribute& attr) : jstr() { + jstr.beginObject(); + jstr.appendKey("name").appendString(attr.getName()); + } + void addKV(const char* key, const char* value) { jstr.appendKey(key).appendString(value); } + void addKV(const char* key, double value) { jstr.appendKey(key).appendDouble(value); } + const char* message() { + jstr.endObject(); + return jstr.str().c_str(); + } +}; +} // namespace + +void TensorAttributeLoader::log_event(const char* eventName) { + EV_STATE(eventName, EventValue(_attr).message()); +} + +void TensorAttributeLoader::log_event(const char* eventName, const char* key, const char* value) { + EventValue ev(_attr); + ev.addKV(key, value); + EV_STATE(eventName, ev.message()); } + +void TensorAttributeLoader::log_event(const char* eventName, const char* key, double value) { + EventValue ev(_attr); + ev.addKV(key, value); + EV_STATE(eventName, ev.message()); +} + +} // namespace search::tensor diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h index 89a07c03de5f..d65455600096 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h @@ -36,6 +36,9 @@ class TensorAttributeLoader { bool load_index(); uint64_t get_index_size_on_disk(); void check_consistency(uint32_t docid_limit); + void log_event(const char *eventName); + void log_event(const char *eventName, const char *key, const char *value); + void log_event(const char *eventName, const char *key, double value); public: TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index); @@ -44,4 +47,3 @@ class TensorAttributeLoader { }; } - From be3fde4a80ba93e7267ee87a216a6febfba236df Mon Sep 17 00:00:00 2001 From: gjoranv Date: Tue, 19 Nov 2024 13:17:36 +0100 Subject: [PATCH 076/126] Don't leak stack trace to log --- .../src/main/java/ai/vespa/secret/aws/AsmSecretReader.java | 2 +- .../java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java index 0c8e2cb63b75..1a19224b88c2 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java @@ -143,7 +143,7 @@ public Secret getSecret(Key key, SecretVersionId version) { var msg = version == null ? "Failed to retrieve current version of secret with key " + key : "Failed to retrieve secret with key " + key + ", version: " + version.value(); - throw new IllegalArgumentException(msg, e); + throw new IllegalArgumentException(msg + ":\n" + e.getMessage()); } } diff --git a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java index cd05fb4898fe..b0a449b36707 100644 --- a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java +++ b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java @@ -114,10 +114,10 @@ void it_throws_exception_if_secret_not_found() { var key = new Key(vault, SecretName.of("secret1")); try (var reader = secretReader()) { var e = assertThrows(IllegalArgumentException.class, () -> reader.getSecret(key)); - assertEquals("Failed to retrieve current version of secret with key vault1/secret1", e.getMessage()); + assertTrue(e.getMessage().startsWith("Failed to retrieve current version of secret with key vault1/secret1")); e = assertThrows(IllegalArgumentException.class, () -> reader.getSecret(key, SecretVersionId.of("1"))); - assertEquals("Failed to retrieve secret with key vault1/secret1, version: 1", e.getMessage()); + assertTrue(e.getMessage().startsWith("Failed to retrieve secret with key vault1/secret1, version: 1")); } } @@ -131,7 +131,7 @@ void it_throws_exception_if_version_not_found() { try (var store = secretReader()) { var e = assertThrows(IllegalArgumentException.class, () -> store.getSecret(key, SecretVersionId.of("2"))); - assertEquals("Failed to retrieve secret with key vault1/secret1, version: 2", e.getMessage()); + assertTrue(e.getMessage().startsWith("Failed to retrieve secret with key vault1/secret1, version: 2")); } } From ee874cf4265c8575f09aabf09bd235277e2d0fe4 Mon Sep 17 00:00:00 2001 From: Morten Tokle Date: Tue, 19 Nov 2024 14:02:41 +0100 Subject: [PATCH 077/126] Initialize in correct order --- .../main/java/ai/vespa/secret/aws/AsmSecretReader.java | 4 ++-- .../test/java/ai/vespa/secret/aws/SecretsImplTest.java | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java index 19ad2e18b35a..3621752c3fc6 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java @@ -60,17 +60,17 @@ public AsmSecretReader(URI ztsUri, SSLContext sslContext, AthenzDomain domain) { private AsmSecretReader(ZtsClient ztsClient, AthenzDomain domain, Duration refreshInterval) { super(ztsClient, domain); + this.refreshInterval = refreshInterval; cache = initCache(); ztsClientCloser = ztsClient::close; - this.refreshInterval = refreshInterval; } // For testing public AsmSecretReader(Function clientAndCredentialsSupplier) { super(clientAndCredentialsSupplier); + this.refreshInterval = DEFAULT_REFRESH_INTERVAL; cache = initCache(); ztsClientCloser = () -> {}; - this.refreshInterval = DEFAULT_REFRESH_INTERVAL; } diff --git a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/SecretsImplTest.java b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/SecretsImplTest.java index 225d4d86fcd7..54526b9c53af 100644 --- a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/SecretsImplTest.java +++ b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/SecretsImplTest.java @@ -47,16 +47,6 @@ public void testThrowOnUnknownSecrets() { } } - @Test - public void testSecretInConfigButNotInVault() { - try { - secrets.get("mySecret"); - fail(); - } catch (IllegalArgumentException e) { - assertEquals("Secret with key 'mySecret' not found in secret store", e.getMessage()); - } - } - private static SecretsImpl createSecrets() { var config = createSecretsConfig(); var secretStore = createSecretStore(); From 5b7724cf9cf53b0a63a310e9dc1164f239934379 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 19 Nov 2024 14:32:15 +0100 Subject: [PATCH 078/126] More testing of posting list handle trimming in disk index unit test. --- .../diskindex/diskindex/diskindex_test.cpp | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index 49f4b1ece23f..3548a4a59e81 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -24,9 +24,11 @@ using search::BitVector; using search::BitVectorIterator; using search::diskindex::DiskIndex; using search::diskindex::DiskTermBlueprint; +using search::diskindex::FieldIndex; using search::diskindex::TestDiskIndex; using search::diskindex::ZcRareWordPosOccIterator; using search::fef::TermFieldMatchDataArray; +using search::index::DictionaryLookupResult; using search::index::DummyFileHeaderContext; using search::index::PostingListHandle; using search::index::Schema; @@ -128,7 +130,7 @@ class DiskIndexTest : public ::testing::Test, public TestDiskIndex { static void SetUpTestSuite(); static void TearDownTestSuite(); void requireThatLookupIsWorking(const EmptySettings& empty_settings); - void requireThatWeCanReadPostingList(); + void requireThatWeCanReadPostingList(const IOSettings& io_settings); void require_that_we_can_get_field_length_info(); void requireThatWeCanReadBitVector(); void requireThatBlueprintIsCreated(); @@ -138,6 +140,8 @@ class DiskIndexTest : public ::testing::Test, public TestDiskIndex { void build_index(const IOSettings& io_settings, const EmptySettings& empty_settings); void test_empty_settings(const EmptySettings& empty_settings); void test_io_settings(const IOSettings& io_settings); + SimpleResult search(const FieldIndex& field_index, const DictionaryLookupResult& lookup_result, + const PostingListHandle& handle); }; DiskIndexTest::DiskIndexTest() = default; @@ -243,10 +247,19 @@ DiskIndexTest::requireThatLookupIsWorking(const EmptySettings& empty_settings) } } -void -DiskIndexTest::requireThatWeCanReadPostingList() +SimpleResult +DiskIndexTest::search(const FieldIndex& field_index, const DictionaryLookupResult& lookup_result, + const PostingListHandle& handle) { TermFieldMatchDataArray mda; + auto sb = field_index.create_iterator(lookup_result, handle, mda); + return SimpleResult().search(*sb); +} + + +void +DiskIndexTest::requireThatWeCanReadPostingList(const IOSettings& io_settings) +{ { // field 'f1' auto r = _index->lookup(0, "w1"); auto& field_index = _index->get_field_index(0); @@ -254,8 +267,15 @@ DiskIndexTest::requireThatWeCanReadPostingList() if (field_index.is_posting_list_cache_enabled()) { EXPECT_GT(64, h._allocSize); } - auto sb = field_index.create_iterator(r, h, mda); - EXPECT_EQ(SimpleResult({1,3}), SimpleResult().search(*sb)); + EXPECT_EQ(SimpleResult({1,3}), search(field_index, r, h)); + if (io_settings._use_directio && !io_settings._use_mmap) { + auto directio_handle = field_index.read_uncached_posting_list(r, false); + EXPECT_LT(256, directio_handle._allocSize); + EXPECT_EQ(SimpleResult({1,3}), search(field_index, r, directio_handle)); + auto trimmed_directio_handle = field_index.read_uncached_posting_list(r, true); + EXPECT_GT(64, trimmed_directio_handle._allocSize); + EXPECT_EQ(SimpleResult({1,3}), search(field_index, r, trimmed_directio_handle)); + } } } @@ -455,7 +475,7 @@ DiskIndexTest::test_io_settings(const IOSettings& io_settings) EmptySettings empty_settings; build_index(io_settings, empty_settings); requireThatLookupIsWorking(empty_settings); - requireThatWeCanReadPostingList(); + requireThatWeCanReadPostingList(io_settings); require_that_we_can_get_field_length_info(); requireThatWeCanReadBitVector(); requireThatBlueprintIsCreated(); From 07673088f7c790810b6c58155606f5a291a32625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Pettersen?= Date: Tue, 19 Nov 2024 13:31:33 +0000 Subject: [PATCH 079/126] improve story --- eval/src/apps/eval_expr/eval_expr.cpp | 89 ++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 15 deletions(-) diff --git a/eval/src/apps/eval_expr/eval_expr.cpp b/eval/src/apps/eval_expr/eval_expr.cpp index fd4ec875c2bb..35e5a16b4e2b 100644 --- a/eval/src/apps/eval_expr/eval_expr.cpp +++ b/eval/src/apps/eval_expr/eval_expr.cpp @@ -654,8 +654,8 @@ void compress_impl(const std::string &str, auto &bits, auto &dict, auto &dst) { } else { dst.write_bits(dict.get(ctx_w), bits.width()); } - bits.use(); dict.add(ctx_wc); + bits.use(); ctx_w = ctx_c; } } @@ -679,7 +679,7 @@ std::string decompress_impl(auto &src, auto &bits, auto &dict) { std::string result; - int c = src.read_bits(2); + int c = src.read_bits(bits.width()); if (c == dict.eof) { return result; } @@ -688,6 +688,7 @@ std::string decompress_impl(auto &src, auto &bits, auto &dict) { std::string w(1, char(c)); result.append(w); dict.add(w); + bits.use(); std::string entry; for (;;) { @@ -725,15 +726,15 @@ std::string compress(const std::string &str) { // used to test the compression code above, hence the inlined REQUIREs std::string decompress(const std::string &str) { LZDict dict; - BitWidthTracker bits(3, 4); + BitWidthTracker bits(2, 1); UrlSafeBitInput src(str); return decompress_impl(src, bits, dict); } // What happens during compression and decompression, the full story struct LZLog { - static constexpr int BW = 18; - static constexpr int PW = 14; + static constexpr int BW = 31; + static constexpr int PW = 18; struct Block { std::vector writer; std::vector reader; @@ -749,7 +750,7 @@ struct LZLog { } size_t wait = (len - writer.size()); for (size_t i = 0; i < len; ++i) { - fprintf(stderr, "%*s%-*s%-*s\n", + fprintf(stderr, "%*s%*s%-*s\n", BW, (i >= wait) ? writer[i - wait].c_str() : "", PW, "", BW, (i < reader.size()) ? reader[i].c_str() : ""); @@ -759,13 +760,15 @@ struct LZLog { struct Packet { int bits; int value; + std::string writer; + std::string reader; Packet(int bits_in, int value_in) noexcept : bits(bits_in), value(value_in) {} void dump() { - fprintf(stderr, "%*s%-*s%-*s\n", - BW, fmt("write %d bits", bits).c_str(), - PW, fmt(" -> %4d -> ", value).c_str(), - BW, fmt("read %d bits", bits).c_str()); + fprintf(stderr, "%*s%*s%-*s\n", + BW, writer.c_str(), + PW, fmt(" -> %2db:%6d -> ", bits, value).c_str(), + BW, reader.c_str()); } }; std::vector blocks; @@ -779,7 +782,7 @@ struct LZLog { ensure_block(block); blocks[block].writer.push_back(msg); } - int packet(int block, int bits, int value) { + int ensure_packet(int block, int bits, int value) { if (packets.size() <= size_t(block)) { REQUIRE_EQ(packets.size(), size_t(block)); packets.emplace_back(bits, value); @@ -789,6 +792,16 @@ struct LZLog { } return block + 1; } + int write_packet(int block, int bits, int value, const std::string &msg) { + int res = ensure_packet(block, bits, value); + packets[block].writer = msg; + return res; + } + int read_packet(int block, int bits, int value, const std::string &msg) { + int res = ensure_packet(block, bits, value); + packets[block].reader = msg; + return res; + } void reader(int block, const std::string &msg) { ensure_block(block); blocks[block].reader.push_back(msg); @@ -798,7 +811,7 @@ struct LZLog { std::string psep(PW, '-'); REQUIRE_EQ(blocks.size(), packets.size() + 1); fprintf(stderr, "%s%s%s\n", bsep.c_str(), psep.c_str(), bsep.c_str()); - fprintf(stderr, "%*s%-*s%-*s\n", BW, "COMPRESS", PW, " DATA", BW, "DECOMPRESS"); + fprintf(stderr, "%*s%*s%-*s\n", BW, "COMPRESS", PW, "DATA ", BW, "DECOMPRESS"); fprintf(stderr, "%s%s%s\n", bsep.c_str(), psep.c_str(), bsep.c_str()); for (size_t i = 0; i < blocks.size(); ++i) { blocks[i].dump(i); @@ -813,6 +826,7 @@ struct LZLog { LZLog &log; size_t idx = 0; LZDict dict; + bool expect_lit8 = false; BitWidthTracker bits{2,2}; UrlSafeBitOutput dst; Writer(LZLog &log_in) : log(log_in) {} @@ -838,8 +852,24 @@ struct LZLog { log.writer(idx, fmt("bit width %d -> %d", before, after)); } void write_bits(int x, int n) { + std::string msg; + if (expect_lit8) { + msg = fmt("write lit8 '%c'", char(x)); + } else { + switch (x) { + case lit8: + msg = "write lit8 tag"; + break; + case eof: + msg = "write EOF tag"; + break; + default: + msg = fmt("write entry '%s'", dict.get(x).c_str()); + } + } + expect_lit8 = (x == lit8); dst.write_bits(x, n); - idx = log.packet(idx, n, x); + idx = log.write_packet(idx, n, x, msg); } void flush() { dst.flush(); @@ -850,7 +880,9 @@ struct LZLog { LZLog &log; size_t idx = 0; LZDict dict; - BitWidthTracker bits{3,4}; + bool expect_lit8 = false; + int prev = -1; + BitWidthTracker bits{2,1}; UrlSafeBitInput src; Reader(LZLog &log_in, const std::string &str) : log(log_in), src(str) {} ~Reader(); @@ -865,7 +897,34 @@ struct LZLog { int read_bits(int n) { int x = src.read_bits(n); - idx = log.packet(idx, n, x); + std::string msg; + if (expect_lit8) { + msg = fmt("read lit8 '%c'", char(x)); + prev = dict.size(); + } else { + switch (x) { + case lit8: + msg = "read lit8 tag"; + prev = -1; + break; + case eof: + msg = "read EOF tag"; + prev = -1; + break; + default: + if (x == dict.size()) { + REQUIRE(prev != -1); + std::string entry = dict.get(prev); + entry.push_back(entry[0]); + msg = fmt("infer entry '%s'", entry.c_str()); + } else { + msg = fmt("read entry '%s'", dict.get(x).c_str()); + } + prev = x; + } + } + expect_lit8 = (x == lit8); + idx = log.read_packet(idx, n, x, msg); return x; } void use() { From 3df014a512f367f104d267ef0fa7e23537a44b0d Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Tue, 19 Nov 2024 14:40:18 +0100 Subject: [PATCH 080/126] Revert "Log warning (shown in console) when using admin version 2.0 on cloud" --- .../java/com/yahoo/config/model/admin/AdminModel.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java index 0625dc20e22a..4ef591cda9f7 100644 --- a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java +++ b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java @@ -20,8 +20,7 @@ import java.util.Collection; import java.util.List; - -import static java.util.logging.Level.WARNING; +import java.util.logging.Level; /** * Config model adaptor of the Admin class. @@ -79,10 +78,7 @@ public BuilderV2() { @Override public void doBuild(AdminModel model, Element adminElement, ConfigModelContext modelContext) { - // admin v4 is used on hosted: Build a default V4 instead - if (modelContext.getDeployState().isHosted()) { - modelContext.getDeployLogger().logApplicationPackage(WARNING, " version 2.0 is deprecated" + - " and will be ignored, please use version 4.0 instead"); + if (modelContext.getDeployState().isHosted()) { // admin v4 is used on hosted: Build a default V4 instead new BuilderV4().doBuild(model, adminElement, modelContext); return; } @@ -118,7 +114,7 @@ public void doBuild(AdminModel model, Element adminElement, ConfigModelContext m // TODO: Remove in Vespa 9 if ("3.0".equals(adminElement.getAttribute("version"))) modelContext.getDeployState().getDeployLogger() - .logApplicationPackage(WARNING, "admin model version 3.0 is deprecated and support will removed in Vespa 9, " + + .logApplicationPackage(Level.WARNING, "admin model version 3.0 is deprecated and support will removed in Vespa 9, " + "please use version 4.0 or remove the element completely. See https://cloud.vespa.ai/en/reference/services#ignored-elements"); TreeConfigProducer parent = modelContext.getParentProducer(); From f9342cdde2c5a08b25c1ae83ee7a64b7ee35d39f Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Tue, 19 Nov 2024 14:42:32 +0100 Subject: [PATCH 081/126] Update comment about using version 2.0 of admin on cloud --- .../main/java/com/yahoo/config/model/admin/AdminModel.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java index 4ef591cda9f7..88335e5cc394 100644 --- a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java +++ b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java @@ -78,7 +78,9 @@ public BuilderV2() { @Override public void doBuild(AdminModel model, Element adminElement, ConfigModelContext modelContext) { - if (modelContext.getDeployState().isHosted()) { // admin v4 is used on hosted: Build a default V4 instead + // admin v4 is used on hosted: Build a default V4 instead. We want to allow version 2.0 so + // that self-hosted apps deploy without changes. TODO: Warn if tags from version 2.0 are used (and ignored) + if (modelContext.getDeployState().isHosted()) { new BuilderV4().doBuild(model, adminElement, modelContext); return; } From 52eecf5f524784786e392300deaab6e40f2fa3c0 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 19 Nov 2024 15:52:02 +0000 Subject: [PATCH 082/126] Fix hit rate metric for cache by avoiding precision loss. --- searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h index b38a21bb2d45..1f55ad45093d 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h @@ -16,7 +16,7 @@ namespace proton { class CacheMetrics : public metrics::MetricSet { metrics::LongValueMetric memoryUsage; metrics::LongValueMetric elements; - metrics::LongAverageMetric hitRate; + metrics::DoubleAverageMetric hitRate; metrics::LongCountMetric lookups; metrics::LongCountMetric invalidations; std::string _cache_name; From e05195b45836351a66194e1883dc9d43db3497dd Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 19 Nov 2024 15:57:49 +0000 Subject: [PATCH 083/126] Export sum and count for index io search read metrics. sum is needed to graph bytes/sec. count is needed to graph ops/sec. --- .../main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java | 4 ++-- .../src/main/java/ai/vespa/metrics/set/VespaMetricSet.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java index b24cdcfbcdbc..6f53da81d3e1 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java @@ -444,8 +444,8 @@ private static Set getSearchNodeMetrics() { // index addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ALLOCATED_BYTES.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES.rate()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES, EnumSet.of(sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES, EnumSet.of(sum, count)); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_INDEX_DISK_USAGE.average()); // index caches diff --git a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java index 2bd426e2a8c3..c56e54017ca7 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java @@ -514,8 +514,8 @@ private static Set getSearchNodeMetrics() { addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_USED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_DEAD_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ONHOLD_BYTES.average()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES.rate()); - addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES, EnumSet.of(sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES, EnumSet.of(sum, count)); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_INDEX_DISK_USAGE.average()); // index caches From 2bc7ad9e7b36cbb9c7d2c0390bd8e8146827954d Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 19 Nov 2024 23:08:20 +0100 Subject: [PATCH 084/126] Revert "add progress reporting when rebuilding HNSW index" --- .../tensor/tensor_attribute_loader.cpp | 54 +------------------ .../tensor/tensor_attribute_loader.h | 4 +- 2 files changed, 3 insertions(+), 55 deletions(-) diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp index 70197fefffdd..4d862b9b76a4 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp @@ -13,12 +13,10 @@ #include #include #include -#include #include #include -#include #include -#include +#include #include #include #include @@ -43,15 +41,12 @@ bool can_use_index_save_file(const search::attribute::Config &config, const AttributeHeader& header) { if (!config.hnsw_index_params().has_value() || !header.get_hnsw_index_params().has_value()) { - LOG(warning, "Cannot use saved HNSW index for ANN, missing index parameters"); return false; } const auto &config_params = config.hnsw_index_params().value(); const auto &header_params = header.get_hnsw_index_params().value(); if ((config_params.max_links_per_node() != header_params.max_links_per_node()) || - (config_params.distance_metric() != header_params.distance_metric())) - { - LOG(warning, "Cannot use saved HNSW index for ANN, index parameters have changed"); + (config_params.distance_metric() != header_params.distance_metric())) { return false; } return true; @@ -263,28 +258,16 @@ TensorAttributeLoader::build_index(vespalib::Executor* executor, uint32_t docid_ std::unique_ptr builder; if (executor != nullptr) { builder = std::make_unique(_attr, _generation_handler, _store, *_index, *executor); - log_event("hnsw.index.rebuild.start", "execution", "multi-threaded"); } else { builder = std::make_unique(_attr, *_index); - log_event("hnsw.index.rebuild.start", "execution", "single-threaded"); } - constexpr vespalib::duration report_interval = 60s; - auto beforeStamp = vespalib::steady_clock::now(); - auto last_report = beforeStamp; for (uint32_t lid = 0; lid < docid_limit; ++lid) { auto ref = _ref_vector[lid].load_relaxed(); if (ref.valid()) { builder->add(lid); - auto now = vespalib::steady_clock::now(); - if (last_report + report_interval < now) { - log_event("hnsw.index.rebuild.progress", "percent", (lid * 100.0 / docid_limit)); - last_report = now; - } } } builder->wait_complete(); - vespalib::duration elapsedTime = vespalib::steady_clock::now() - beforeStamp; - log_event("hnsw.index.rebuild.complete", "time.elapsed.ms", vespalib::count_ms(elapsedTime)); _attr.commit(); } @@ -374,37 +357,4 @@ TensorAttributeLoader::check_consistency(uint32_t docid_limit) inconsistencies, _attr.getName().c_str(), elapsed); } - -namespace { -struct EventValue { - vespalib::JSONStringer jstr; - EventValue(const TensorAttribute& attr) : jstr() { - jstr.beginObject(); - jstr.appendKey("name").appendString(attr.getName()); - } - void addKV(const char* key, const char* value) { jstr.appendKey(key).appendString(value); } - void addKV(const char* key, double value) { jstr.appendKey(key).appendDouble(value); } - const char* message() { - jstr.endObject(); - return jstr.str().c_str(); - } -}; -} // namespace - -void TensorAttributeLoader::log_event(const char* eventName) { - EV_STATE(eventName, EventValue(_attr).message()); -} - -void TensorAttributeLoader::log_event(const char* eventName, const char* key, const char* value) { - EventValue ev(_attr); - ev.addKV(key, value); - EV_STATE(eventName, ev.message()); } - -void TensorAttributeLoader::log_event(const char* eventName, const char* key, double value) { - EventValue ev(_attr); - ev.addKV(key, value); - EV_STATE(eventName, ev.message()); -} - -} // namespace search::tensor diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h index d65455600096..89a07c03de5f 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h @@ -36,9 +36,6 @@ class TensorAttributeLoader { bool load_index(); uint64_t get_index_size_on_disk(); void check_consistency(uint32_t docid_limit); - void log_event(const char *eventName); - void log_event(const char *eventName, const char *key, const char *value); - void log_event(const char *eventName, const char *key, double value); public: TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index); @@ -47,3 +44,4 @@ class TensorAttributeLoader { }; } + From dfb86ed52f0c0dba157468e382b92dd6c9369983 Mon Sep 17 00:00:00 2001 From: bjormel Date: Wed, 20 Nov 2024 07:30:16 +0000 Subject: [PATCH 085/126] Atlassian JSM maintainer --- flags/src/main/java/com/yahoo/vespa/flags/Flags.java | 6 ++++++ .../src/main/java/ai/vespa/metrics/ControllerMetrics.java | 2 ++ .../java/ai/vespa/metrics/set/InfrastructureMetricSet.java | 3 +++ 3 files changed, 11 insertions(+) diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 400ebf0a69f0..bd3d7b73bf5f 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -398,6 +398,12 @@ public class Flags { "Whether to sync tenants to HubSpot", "Takes effect immediately"); + public static UnboundBooleanFlag ATLASSIAN_SYNC_TENANTS = defineFeatureFlag( + "atlassianb-sync-tenants", false, + List.of("bjormel"), "2024-11-11", "2025-01-01", + "Whether to sync tenants to Atlassian", + "Takes effect immediately"); + public static final UnboundBooleanFlag SYMMETRIC_PUT_AND_ACTIVATE_REPLICA_SELECTION = defineFeatureFlag( "symmetric-put-and-activate-replica-selection", false, List.of("vekterli"), "2024-05-23", "2024-12-01", diff --git a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java index 3a00a9fcfc52..0b64c78fe0ef 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java @@ -53,6 +53,8 @@ public enum ControllerMetrics implements VespaMetrics { HUBSPOT_EXCEPTIONS("hubspot.exceptions", Unit.FAILURE, "Controller: Hubspot exceptions"), HUBSPOT_LAST_SUCCESS("hubspot.last_success", Unit.SECONDS_SINCE_EPOCH, "Controller: Last successful Hubspot synchronization"), TENANT_CREATED_LAST_SUCCESS("tenant.created.last_success", Unit.SECONDS_SINCE_EPOCH, "Controller: Last successful tenant creation"), + ATLASSIAN_EXCEPTIONS("atlassian.exceptions", Unit.FAILURE, "Controller: Atlassian exceptions"), + ATLASSIAN_LAST_SUCCESS("atlassian.last_success", Unit.SECONDS_SINCE_EPOCH, "Controller: Last successful Atlassian synchronization"), // Metrics per API, metrics names generated in ControllerMaintainer/MetricsReporter OPERATION_APPLICATION("operation.application", Unit.REQUEST, "Controller: Requests for /application API"), diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java index 29c1ea923a5b..22f6d489f9b9 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java @@ -210,6 +210,9 @@ private static Set getControllerMetrics() { addMetric(metrics, ControllerMetrics.TENANT_CREATED_LAST_SUCCESS.last()); + addMetric(metrics, ControllerMetrics.ATLASSIAN_EXCEPTIONS.count()); + addMetric(metrics, ControllerMetrics.ATLASSIAN_LAST_SUCCESS.last()); + return metrics; } From bf254996c66f2c1689a1cf1b9313e10884e68ba5 Mon Sep 17 00:00:00 2001 From: Arnstein Ressem Date: Wed, 20 Nov 2024 09:32:26 +0100 Subject: [PATCH 086/126] Remove old script for factory communication --- .buildkite/factory-command-old.sh | 101 ------------------------------ 1 file changed, 101 deletions(-) delete mode 100755 .buildkite/factory-command-old.sh diff --git a/.buildkite/factory-command-old.sh b/.buildkite/factory-command-old.sh deleted file mode 100755 index ce9ecc70ef58..000000000000 --- a/.buildkite/factory-command-old.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -set -eo pipefail - -if (( $# < 1 )); then - echo "Usage: $0 [options]" - exit 1 -fi - -COMMAND=$1 -FACTORY_API="https://factory.vespa.aws-us-east-1a.vespa.oath.cloud/api/factory/v1" -COOKIEJAR=$(pwd)/jar.txt -# shellcheck disable=2064 -trap "rm -f $COOKIEJAR" EXIT - -SESSION_TOKEN=null -WAIT_UNTIL=$(( $(date +%s) + 120 )) -set +e -while [[ $SESSION_TOKEN == null ]]; do - SESSION_TOKEN=$(curl -s -H 'Content-Type: application/json' -H 'Accept: application/json' -d "{ \"username\": \"svc-okta-vespa-factory\", \"password\": \"$SVC_OKTA_VESPA_FACTORY_TOKEN\" }" https://ouryahoo.okta.com/api/v1/authn | jq -re '.sessionToken') - - if [[ $SESSION_TOKEN == null ]]; then - if [[ $(date +%s) -ge $WAIT_UNTIL ]]; then - echo "Could not fetch session token from Okta: SESSION_TOKEN=$SESSION_TOKEN" - exit 1 - else - echo "Invalid SESSION_TOKEN=$SESSION_TOKEN . Trying again ..." >&2 - sleep 3 - fi - fi -done -set -e - -LOCATION=$(curl -s -i -c "$COOKIEJAR" "https://factory.vespa.aws-us-east-1a.vespa.oath.cloud/login" | grep location | awk '{print $2}' | tr -d '\r') -curl -sL -b "$COOKIEJAR" -c "$COOKIEJAR" "$LOCATION&sessionToken=$SESSION_TOKEN" &> /dev/null - -CURL="curl -sL -b $COOKIEJAR" - -shift -case $COMMAND in - get-version) - VERSION=$1 - if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL "$FACTORY_API/versions/$VERSION" - ;; - create-build) - FACTORY_PIPELINE_ID=$1 - FACTORY_PLATFORM=$2 - if [[ -z $FACTORY_PIPELINE_ID ]]; then echo "Usage: $0 $COMMAND [factory platform]"; exit 1; fi - if [[ -z $FACTORY_PLATFORM ]]; then FACTORY_PLATFORM="opensource_centos7"; fi - $CURL -d "{ - \"startSeconds\": $(date +%s), - \"sdApiUrl\": \"https://api.buildkite.com/\", - \"pipelineId\": $FACTORY_PIPELINE_ID, - \"jobId\": 0, - \"buildId\": $BUILDKITE_BUILD_NUMBER, - \"platform\": \"$FACTORY_PLATFORM\" - }" \ - "$FACTORY_API/builds" - ;; - create-release) - $CURL -d "{ - \"startSeconds\": $(date +%s), - \"systemName\": \"opensource\" - }" \ - "$FACTORY_API/releases" - ;; - update-build-status) - FACTORY_PIPELINE_ID=$1 - STATUS=$2 - DESCRIPTION=$3 - FACTORY_BUILD_NUMBER=$(( FACTORY_PIPELINE_ID << 32 | BUILDKITE_BUILD_NUMBER & 0xFFFFFF )) - if [[ -z $FACTORY_PIPELINE_ID ]] || [[ -z $STATUS ]] || [[ -z $DESCRIPTION ]]; then - echo "Usage: $0 $COMMAND " - exit 1 - fi - $CURL -d "{ - \"updatedSeconds\": $(date +%s), - \"sdApiUrl\": \"https://api.buildkite.com/\", - \"pipelineId\": $FACTORY_PIPELINE_ID, - \"jobId\": 0, - \"buildId\": $FACTORY_BUILD_NUMBER, - \"status\": \"$STATUS\", - \"description\": \"$DESCRIPTION\" - }" \ - "$FACTORY_API/builds/$FACTORY_BUILD_NUMBER/status" - ;; - update-released-time) - VERSION=$1 - if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL -d "{ - \"releasedSeconds\": $(date +%s), - \"systemName\": \"opensource\" - }" \ - "$FACTORY_API/releases/$VERSION" - ;; - *) - echo "Unknown command $COMMAND" - exit 1 - ;; -esac From 79d0f6db956ced71abcabb20b32c96d404cdf2f2 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Tue, 19 Nov 2024 14:30:24 +0000 Subject: [PATCH 087/126] add progress reporting when rebuilding HNSW index, take 2 --- .../tensor/tensor_attribute_loader.cpp | 58 +++++++++++++++++-- .../tensor/tensor_attribute_loader.h | 1 - valgrind-suppressions.txt | 44 ++++++++++++++ 3 files changed, 96 insertions(+), 7 deletions(-) diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp index 4d862b9b76a4..04e8a7316c86 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp @@ -13,10 +13,12 @@ #include #include #include +#include #include #include +#include #include -#include +#include #include #include #include @@ -34,19 +36,47 @@ namespace search::tensor { inline namespace loader { +class Event { +private: + vespalib::JSONStringer jstr; +public: + Event(const TensorAttribute& attr) : jstr() { + jstr.beginObject(); + jstr.appendKey("name").appendString(attr.getName()); + } + Event& addKV(const char* key, const char* value) { + jstr.appendKey(key).appendString(value); + return *this; + } + Event& addKV(const char* key, double value) { + jstr.appendKey(key).appendDouble(value); + return *this; + } + void log(const char *eventName) { + jstr.endObject(); + EV_STATE(eventName, jstr.str().c_str()); + } +}; + constexpr uint32_t LOAD_COMMIT_INTERVAL = 256; const std::string tensorTypeTag("tensortype"); -bool -can_use_index_save_file(const search::attribute::Config &config, const AttributeHeader& header) +bool can_use_index_save_file(const std::string& attrName, + const search::attribute::Config &config, + const AttributeHeader& header) { if (!config.hnsw_index_params().has_value() || !header.get_hnsw_index_params().has_value()) { + LOG(warning, "Attribute %s cannot use saved HNSW index for ANN (missing parameters)", + attrName.c_str()); return false; } const auto &config_params = config.hnsw_index_params().value(); const auto &header_params = header.get_hnsw_index_params().value(); if ((config_params.max_links_per_node() != header_params.max_links_per_node()) || - (config_params.distance_metric() != header_params.distance_metric())) { + (config_params.distance_metric() != header_params.distance_metric())) + { + LOG(warning, "Attribute %s cannot use saved HNSW index for ANN, index parameters have changed", + attrName.c_str()); return false; } return true; @@ -258,16 +288,32 @@ TensorAttributeLoader::build_index(vespalib::Executor* executor, uint32_t docid_ std::unique_ptr builder; if (executor != nullptr) { builder = std::make_unique(_attr, _generation_handler, _store, *_index, *executor); + Event(_attr).addKV("execution", "multi-threaded").log("hnsw.index.rebuild.start"); } else { builder = std::make_unique(_attr, *_index); + Event(_attr).addKV("execution", "single-threaded").log("hnsw.index.rebuild.start"); } + constexpr vespalib::duration report_interval = 60s; + auto beforeStamp = vespalib::steady_clock::now(); + auto last_report = beforeStamp; for (uint32_t lid = 0; lid < docid_limit; ++lid) { auto ref = _ref_vector[lid].load_relaxed(); if (ref.valid()) { builder->add(lid); + auto now = vespalib::steady_clock::now(); + if (last_report + report_interval < now) { + Event(_attr) + .addKV("percent", (lid * 100.0 / docid_limit)) + .log("hnsw.index.rebuild.progress"); + last_report = now; + } } } builder->wait_complete(); + vespalib::duration elapsedTime = vespalib::steady_clock::now() - beforeStamp; + Event(_attr) + .addKV("time.elapsed.ms", vespalib::count_ms(elapsedTime)) + .log("hnsw.index.rebuild.complete"); _attr.commit(); } @@ -330,7 +376,7 @@ TensorAttributeLoader::on_load(vespalib::Executor* executor) bool use_index_file = false; if (has_index_file(_attr)) { auto header = AttributeHeader::extractTags(reader.getDatHeader(), _attr.getBaseFileName()); - use_index_file = can_use_index_save_file(_attr.getConfig(), header); + use_index_file = can_use_index_save_file(_attr.getName(), _attr.getConfig(), header); } if (use_index_file) { if (!load_index()) { @@ -357,4 +403,4 @@ TensorAttributeLoader::check_consistency(uint32_t docid_limit) inconsistencies, _attr.getName().c_str(), elapsed); } -} +} // namespace search::tensor diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h index 89a07c03de5f..968a60fe9e99 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h @@ -44,4 +44,3 @@ class TensorAttributeLoader { }; } - diff --git a/valgrind-suppressions.txt b/valgrind-suppressions.txt index 36cc61701b4a..c769e6e068f6 100644 --- a/valgrind-suppressions.txt +++ b/valgrind-suppressions.txt @@ -617,3 +617,47 @@ fun:setenv ... } +{ + Protobuf 5.26.1 suppression 7 + Memcheck:Leak + match-leak-kinds: reachable + fun:_Znwm + ... + fun:_ZN6google8protobuf14DescriptorPool24InternalAddGeneratedFileEPKvi + fun:_ZN6google8protobuf8internal14AddDescriptorsEPKNS1_15DescriptorTableE + ... +} +{ + Protobuf 5.26.1 suppression 8 + Memcheck:Leak + match-leak-kinds: reachable + fun:_Znwm + ... + fun:_ZN6google8protobuf8internal13OnShutdownRunEPFvPKvES3_ + fun:_ZN6google8protobuf8internal24InitProtobufDefaultsSlowEv + ... +} +{ + Protobuf 5.26.1 suppression 9 + Memcheck:Leak + match-leak-kinds: reachable + fun:_Znwm + fun:_ZN6google8protobuf12_GLOBAL__N_123GeneratedMessageFactory9singletonEv + fun:_ZN6google8protobuf14MessageFactory29InternalRegisterGeneratedFileEPKNS0_8internal15DescriptorTableE + ... + fun:call_init + ... +} +{ + Protobuf 5.26.1 suppression 10 + Memcheck:Leak + match-leak-kinds: reachable + fun:_Znwm + fun:_ZN4absl12lts_2024011618container_internal19HashSetResizeHelper15InitializeSlotsISaIcELm8ELb1ELm8EEEbRNS1_12CommonFieldsEPvT_.isra.0 + fun:_ZN4absl12lts_2024011618container_internal12raw_hash_setINS1_17FlatHashSetPolicyIPKN6google8protobuf8internal15DescriptorTableEEENS5_12_GLOBAL__N_123GeneratedMessageFactory20DescriptorByNameHashENSC_18DescriptorByNameEqESaIS9_EE6resizeEm + fun:_ZN4absl12lts_2024011618container_internal12raw_hash_setINS1_17FlatHashSetPolicyIPKN6google8protobuf8internal15DescriptorTableEEENS5_12_GLOBAL__N_123GeneratedMessageFactory20DescriptorByNameHashENSC_18DescriptorByNameEqESaIS9_EE14prepare_insertEm + fun:_ZN6google8protobuf14MessageFactory29InternalRegisterGeneratedFileEPKNS0_8internal15DescriptorTableE + ... + fun:call_init + ... +} From 97fe9fae0ca7a877c1b37b3cc047fabdf389812b Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Wed, 20 Nov 2024 12:35:47 +0100 Subject: [PATCH 088/126] Assert nonzero io sizes in field index. --- .../src/vespa/searchlib/diskindex/field_index.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index 68f4aabab38f..6247c73d56f6 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -175,9 +176,8 @@ PostingListHandle FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_result, bool trim) const { auto handle = _posting_file->read_posting_list(lookup_result); - if (handle._read_bytes != 0) { - _cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes); - } + assert(handle._read_bytes != 0); + _cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes); if (trim) { _posting_file->consider_trim_posting_list(lookup_result, handle, 0.2); // Trim posting list if more than 20% bloat } @@ -210,7 +210,8 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const key.bit_length = lookup_result.counts._bitLength; IPostingListCache::Context ctx(this); auto result = _posting_list_cache->read(key, ctx); - if (!ctx.cache_miss && result._read_bytes != 0) { + if (!ctx.cache_miss) { + assert(result._read_bytes != 0); _cache_disk_io_stats->add_cached_read_operation(result._read_bytes); } return result; @@ -230,9 +231,8 @@ FieldIndex::read_uncached_bit_vector(BitVectorDictionaryLookupResult lookup_resu { ReadStats read_stats; auto result = _bit_vector_dict->read_bitvector(lookup_result, read_stats); - if (read_stats.read_bytes != 0) { - _cache_disk_io_stats->add_uncached_read_operation(read_stats.read_bytes); - } + assert(read_stats.read_bytes != 0); + _cache_disk_io_stats->add_uncached_read_operation(read_stats.read_bytes); return result; } From 7392f40c4915e7411d36511fa1f24eed499023b6 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Wed, 20 Nov 2024 14:22:53 +0100 Subject: [PATCH 089/126] Cleanup: Non-functional changes only --- .../searcher/FieldCollapsingSearcher.java | 52 ++++++------------- .../docprocs/indexing/ScriptManager.java | 2 +- 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java index 0559bd808bc1..dd586c4c3089 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -19,7 +19,7 @@ import java.util.Map; /** - * A searcher which does parameterized collapsing. + * A searcher which removes hits which has an already seen value of a given field. * * @author Steinar Knutsen */ @@ -28,8 +28,9 @@ public class FieldCollapsingSearcher extends Searcher { private static final CompoundName collapse = CompoundName.from("collapse"); - private static final CompoundName collapsefield = CompoundName.from("collapsefield"); - private static final CompoundName collapsesize = CompoundName.from("collapsesize"); + // TODO: Use collapse.field and collapse.size and make these aliases + private static final CompoundName collapseField = CompoundName.from("collapsefield"); + private static final CompoundName collapseSize = CompoundName.from("collapsesize"); private static final CompoundName collapseSummaryName = CompoundName.from("collapse.summary"); /** Separator used for the fieldnames in collapsefield */ @@ -40,15 +41,13 @@ public class FieldCollapsingSearcher extends Searcher { /** * The max number of hits that will be preserved per unique - * value of the collapsing parameter, - * if no field-specific value is configured. + * value of the collapsing parameter, if no field-specific value is configured. */ private int defaultCollapseSize; /** * The factor by which to scale up the requested number of hits - * from the next searcher in the chain, because collapsing will - * likely delete many hits. + * from the next searcher in the chain, because collapsing will likely delete many hits. */ private double extraFactor; @@ -60,10 +59,8 @@ public FieldCollapsingSearcher() { @Inject @SuppressWarnings("unused") public FieldCollapsingSearcher(QrSearchersConfig config) { - QrSearchersConfig.Com.Yahoo.Prelude.Searcher.FieldCollapsingSearcher - s = config.com().yahoo().prelude().searcher().FieldCollapsingSearcher(); - - init(s.collapsesize(), s.extrafactor()); + var searcherConfig = config.com().yahoo().prelude().searcher().FieldCollapsingSearcher(); + init(searcherConfig.collapsesize(), searcherConfig.extrafactor()); } /** @@ -91,14 +88,11 @@ private void init(int collapseSize, double extraFactor) { */ @Override public Result search(com.yahoo.search.Query query, Execution execution) { - String collapseFieldParam = query.properties().getString(collapsefield); - + String collapseFieldParam = query.properties().getString(collapseField); if (collapseFieldParam == null) return execution.search(query); String[] collapseFields = collapseFieldParam.split(separator); - - int globalCollapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize); - + int globalCollapseSize = query.properties().getInteger(collapseSize, defaultCollapseSize); query.properties().set(collapse, "0"); int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0; @@ -118,9 +112,7 @@ public Result search(com.yahoo.search.Query query, Execution execution) { resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); fill(resultSource, summaryClass, execution); - collapse(result, knownCollapses, resultSource, - collapseFields, query.properties(), globalCollapseSize - ); + collapse(result, knownCollapses, resultSource, collapseFields, query.properties(), globalCollapseSize); hitsAfterCollapse = result.getHitCount(); if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) { @@ -140,7 +132,7 @@ public Result search(com.yahoo.search.Query query, Execution execution) { // Set correct meta information result.mergeWith(resultSource); - // Keep only (offset,.. offset+hits) hits + // Keep only (offset ... offset+hits) hits result.hits().trim(query.getOffset(), query.getHits()); // Mark query as query with collapsing query.properties().set(collapse, "1"); @@ -160,7 +152,6 @@ private Result search(Query query, Execution execution, int offset, int hits) { */ private void collapse(Result result, Map knownCollapses, Result resultSource, String[] collapseFields, Properties queryProperties, int globalCollapseSize) { - for (Hit unknownHit : resultSource.hits()) { if (!(unknownHit instanceof FastHit hit)) { result.hits().add(unknownHit); @@ -168,14 +159,10 @@ private void collapse(Result result, Map knownCollapses, Result } boolean addHit = true; - for (String collapseField : collapseFields) { - Object peek = hit.getField(collapseField); String collapseId = peek != null ? peek.toString() : null; - if (collapseId == null) { - continue; - } + if (collapseId == null) continue; // prepending the fieldname is necessary to distinguish between values in the different collapsefields // @ cannot occur in fieldnames @@ -199,19 +186,14 @@ private void collapse(Result result, Map knownCollapses, Result } } - if (addHit) { + if (addHit) result.hits().add(hit); - } } } private int getCollapseSize(Properties properties, String fieldName, int globalCollapseSize) { - Integer fieldCollapseSize = properties.getInteger(collapsesize.append(fieldName)); - - if (fieldCollapseSize != null) { - return fieldCollapseSize; - } - - return globalCollapseSize; + Integer fieldCollapseSize = properties.getInteger(collapseSize.append(fieldName)); + return fieldCollapseSize != null ? fieldCollapseSize : globalCollapseSize; } + } diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java index 39ac44119f9f..b35df86abd8d 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java @@ -111,7 +111,7 @@ private static Map> createScriptsMap(Docume ScriptExpression script = new ScriptExpression(expressions); script.select(fieldPathOptimizer, fieldPathOptimizer); - fieldScripts.put(FULL, new DocumentScript(ilscript.doctype(), ilscript.docfield(),script)); + fieldScripts.put(FULL, new DocumentScript(ilscript.doctype(), ilscript.docfield(), script)); documentFieldScripts.put(ilscript.doctype(), Collections.unmodifiableMap(fieldScripts)); } return Collections.unmodifiableMap(documentFieldScripts); From d1b1f203739459ad09620b79290c8366b5fd0570 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Wed, 20 Nov 2024 14:32:54 +0100 Subject: [PATCH 090/126] Enable distribution config from cluster controller by default --- flags/src/main/java/com/yahoo/vespa/flags/Flags.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 400ebf0a69f0..dc753af3b59e 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -436,7 +436,7 @@ public class Flags { "Takes effect immediately"); public static final UnboundBooleanFlag DISTRIBUTION_CONFIG_FROM_CLUSTER_CONTROLLER = defineFeatureFlag( - "distribution-config-from-cluster-controller", false, + "distribution-config-from-cluster-controller", true, List.of("vekterli"), "2024-07-01", "2024-12-01", "Iff true, the cluster controller will be the authoritative source of distribution " + "config changes in a content cluster, and distribution changes will be part of explicitly " + From 3f2b2f3d63aaf3992fee97200752e05bab9d9a8d Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Wed, 20 Nov 2024 14:35:21 +0100 Subject: [PATCH 091/126] Remove obsolete MetricsSearcher, not in use anymore --- .../model/container/PlatformBundles.java | 1 - .../streamingvisitors/MetricsSearcher.java | 112 -------------- .../MetricsSearcherTestCase.java | 144 ------------------ 3 files changed, 257 deletions(-) delete mode 100644 container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java delete mode 100644 container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java index 468cf8dd9610..52ed06b66860 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java @@ -151,7 +151,6 @@ public static boolean isModelIntegrationClass(String className) { com.yahoo.search.searchchain.ForkingSearcher.class.getName(), com.yahoo.search.searchers.CacheControlSearcher.class.getName(), com.yahoo.search.searchers.RateLimitingSearcher.class.getName(), - com.yahoo.vespa.streamingvisitors.MetricsSearcher.class.getName(), com.yahoo.vespa.streamingvisitors.StreamingBackend.class.getName(), ai.vespa.search.llm.LLMSearcher.class.getName(), ai.vespa.search.llm.RAGSearcher.class.getName() diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java deleted file mode 100644 index e367bb58f804..000000000000 --- a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.streamingvisitors; - -import com.yahoo.log.event.Event; -import com.yahoo.search.query.context.QueryContext; -import com.yahoo.search.result.ErrorMessage; -import com.yahoo.search.searchchain.Execution; -import com.yahoo.search.Query; -import com.yahoo.search.Result; -import com.yahoo.search.Searcher; -import com.yahoo.processing.request.CompoundName; -import com.yahoo.vdslib.VisitorStatistics; - -import java.util.Map; -import java.util.TreeMap; -import java.util.logging.Logger; - -import static com.yahoo.vespa.streamingvisitors.StreamingBackend.STREAMING_STATISTICS; - -/** - * Generates mail-specific query metrics. - */ -public class MetricsSearcher extends Searcher { - - private static final CompoundName metricsearcherId = CompoundName.from("metricsearcher.id"); - private static final CompoundName streamingLoadtype = CompoundName.from("streaming.loadtype"); - - private static final Logger log = Logger.getLogger(MetricsSearcher.class.getName()); - - static class Stats { - long latency = 0; - int count = 0; - int ok = 0; - int failed = 0; - long dataStreamed = 0; - long documentsStreamed = 0; - } - - Map statMap = new TreeMap<>(); - private long lastMetricLog = 0; - - @Override - public Result search(Query query, Execution execution) { - long timeMs = System.currentTimeMillis(); - - // Backwards compatibility - convert metricsearcher.id to streaming.loadtype - // TODO Cleanup at some point - String metricName = query.properties().getString(metricsearcherId); - if (metricName != null) { - query.properties().set(streamingLoadtype, metricName); - } - - Result result = execution.search(query); - - long latency = System.currentTimeMillis() - timeMs; - - metricName = query.properties().getString(streamingLoadtype); - if (metricName == null) { - return result; - } - - synchronized(this) { - Stats stats = statMap.get(metricName); - - if (stats == null) { - stats = new Stats(); - statMap.put(metricName, stats); - } - - stats.count++; - stats.latency += latency; - - if (result.hits().getError() != null && - !result.hits().getErrorHit().hasOnlyErrorCode(ErrorMessage.NULL_QUERY) && - !result.hits().getErrorHit().hasOnlyErrorCode(3)) { - stats.failed++; - } else { - stats.ok++; - } - - VisitorStatistics visitorstats = null; - final QueryContext queryContext = query.getContext(false); - if (queryContext != null) { - visitorstats = (VisitorStatistics)queryContext.getProperty(STREAMING_STATISTICS); - } - if (visitorstats != null) { - stats.dataStreamed += visitorstats.getBytesVisited(); - stats.documentsStreamed += visitorstats.getDocumentsVisited(); - } else { - log.fine("No visitor statistics set in query! - don't use metrics searcher without streaming search"); - } - - if ((timeMs - lastMetricLog) > 60000) { - for (Map.Entry entry : statMap.entrySet()) { - stats = entry.getValue(); - Event.value(entry.getKey() + "_latency", stats.count > 0 ? (double)stats.latency / (double)stats.count : 0); - Event.value(entry.getKey() + "_ok", stats.ok); - Event.value(entry.getKey() + "_failed", stats.failed); - Event.value(entry.getKey() + "_bytesstreamed", stats.dataStreamed); - Event.value(entry.getKey() + "_documentsstreamed", stats.documentsStreamed); - - stats.latency = 0; - stats.count = 0; - } - - lastMetricLog = timeMs; - } - } - - return result; - } -} diff --git a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java b/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java deleted file mode 100644 index 5cc0e6b060b5..000000000000 --- a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.streamingvisitors; - -import com.yahoo.component.chain.Chain; -import com.yahoo.search.Query; -import com.yahoo.search.Result; -import com.yahoo.search.Searcher; -import com.yahoo.search.result.ErrorMessage; -import com.yahoo.search.result.Hit; -import com.yahoo.search.searchchain.Execution; -import com.yahoo.vdslib.VisitorStatistics; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.*; - -/** - * @author Ulf Carlin - */ -public class MetricsSearcherTestCase { - - private final MetricsSearcher metricsSearcher = new MetricsSearcher(); - private final MockBackend backend = new MockBackend(); - private final Chain chain = new Chain<>(metricsSearcher, backend); - private final Execution.Context context = Execution.Context.createContextStub(); - private final MetricsSearcher.Stats expStatsLt1 = new MetricsSearcher.Stats(); - private final MetricsSearcher.Stats expStatsLt2 = new MetricsSearcher.Stats(); - - private static final String LOADTYPE1 = "lt1"; - private static final String LOADTYPE2 = "lt2"; - - private void verifySearch(String metricParam, String message, String detailedMessage) { - Result result = new Execution(chain, context).search(new Query("?query=test&" + metricParam)); - assertEquals(1, result.hits().size()); - if (message == null) { - assertEquals("news:0", result.hits().get(0).getId().toString()); - } else { - assertNotNull(result.hits().getError()); - assertTrue(result.hits().getErrorHit().errors().iterator().next().getMessage().contains(message), - "Expected '" + message + "' to be contained in '" - + result.hits().getErrorHit().errors().iterator().next().getMessage() + "'"); - assertTrue(result.hits().getErrorHit().errors().iterator().next().getDetailedMessage().contains(detailedMessage), - "Expected '" + detailedMessage + "' to be contained in '" - + result.hits().getErrorHit().errors().iterator().next().getDetailedMessage() + "'"); - } - - if (metricParam == null) { - return; - } - - MetricsSearcher.Stats expStats; - MetricsSearcher.Stats actualStats; - if (metricParam.contains(LOADTYPE1)) { - expStats = expStatsLt1; - actualStats = metricsSearcher.statMap.get(LOADTYPE1); - } else { - expStats = expStatsLt2; - actualStats = metricsSearcher.statMap.get(LOADTYPE2); - } - - expStats.count++; - if (message == null) { - expStats.ok++; - } else { - expStats.failed++; - } - if (metricParam.contains(LOADTYPE1)) { - expStats.dataStreamed += 16; - expStats.documentsStreamed += 2; - } - - assertEquals(expStats.count, actualStats.count); - assertEquals(expStats.ok, actualStats.ok); - assertEquals(expStats.failed, actualStats.failed); - assertEquals(expStats.dataStreamed, actualStats.dataStreamed); - assertEquals(expStats.documentsStreamed, actualStats.documentsStreamed); - } - - @Test - void testBasics() { - // Start counting at -1 since count is reset upon the first query by MetricsSearcher.search - expStatsLt1.count--; - String[] loadTypes = {LOADTYPE1, LOADTYPE2}; - for (String loadType : loadTypes) { - verifySearch("streaming.loadtype=" + loadType, null, null); - verifySearch("metricsearcher.id=" + loadType, null, null); - verifySearch(null, null, null); - verifySearch("streaming.loadtype=" + loadType, "Backend communication error", "Detailed error message"); - } - - } - - @Test - void searcherDoesNotTryToDereferenceNullQueryContext() { - backend.setImplicitlyCreateContext(false); - // This will crash with an NPE if the searcher does not cope with null - // query contexts. - new Execution(chain, context).search(new Query("?query=test&streaming.loadtype=" + LOADTYPE1)); - } - - private static class MockBackend extends Searcher { - - private int sequenceNumber = 0; - private final VisitorStatistics visitorStats = new VisitorStatistics(); - private boolean implicitlyCreateContext = true; - - private MockBackend() { - visitorStats.setBucketsVisited(1); - visitorStats.setBytesReturned(8); - visitorStats.setBytesVisited(16); - visitorStats.setDocumentsReturned(1); - visitorStats.setDocumentsVisited(2); - } - - public void setImplicitlyCreateContext(boolean implicitlyCreateContext) { - this.implicitlyCreateContext = implicitlyCreateContext; - } - - @Override - public Result search(Query query, Execution execution) { - if (implicitlyCreateContext) { - String loadType = query.properties().getString("streaming.loadtype"); - assignContextProperties(query, loadType); - } - - Result result = new Result(query); - if (sequenceNumber == 3 || sequenceNumber == 7) { - result.hits().addError(ErrorMessage.createBackendCommunicationError("Detailed error message")); - } else { - result.hits().add(new Hit("news:0")); - } - sequenceNumber++; - return result; - } - - private void assignContextProperties(Query query, String loadType) { - if (loadType != null && loadType.equals(LOADTYPE1)) { - query.getContext(true).setProperty(StreamingBackend.STREAMING_STATISTICS, visitorStats); - } else { - query.getContext(true).setProperty(StreamingBackend.STREAMING_STATISTICS, null); - } - } - } - -} From 4c8383908e42cec7fb6f0371e3dc34cf9489bc67 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 20 Nov 2024 14:38:44 +0000 Subject: [PATCH 092/126] extend unit test with current semantics in more complex cases --- .../prelude/semantics/test/SemanticSearcherTestCase.java | 9 +++++++++ .../com/yahoo/prelude/semantics/test/rulebases/rules.sr | 3 +++ 2 files changed, 12 insertions(+) diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java index 677468c9339c..1db668ec9187 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java @@ -144,6 +144,15 @@ void testExplicitContext() { @Test void testOrProduction() { assertSemantics("OR something somethingelse", "something"); + // I did not expect this: + assertSemantics("OR (AND foo1 something bar2) somethingelse", "foo1 something bar2"); + } + + @Test + void testDoubleOrProduction() { + assertSemantics("OR more evenmore", "somethingmore"); + // This does not seem right: + assertSemantics("OR more (AND foo1 bar2) evenmore (AND foo1 bar2)", "foo1 somethingmore bar2"); } // This test is order dependent. Fix it!! diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr index 51e48213d79a..9807b226d941 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr @@ -49,6 +49,9 @@ java +> -coffee; # Adding an or term something +> ?somethingelse; +# Add two or terms: +somethingmore -> ?more ?evenmore; + # Adding another negative # TODO: Term types in conditions # java -coffee +> -island From 1ba87e138df8a9ebcfa708a6bcd07234d971ff33 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 20 Nov 2024 15:15:02 +0000 Subject: [PATCH 093/126] proposed fix for unexpected semantics --- .../java/com/yahoo/prelude/semantics/engine/Evaluation.java | 4 +++- .../prelude/semantics/test/SemanticSearcherTestCase.java | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java index 91aef5698261..ab46c934455c 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java @@ -343,7 +343,9 @@ private void insertWithDesiredParentType(List items, int index, CompositeI CompositeItem newParent = newParent(desiredParentType); if (parentsParent != null && (! (parentsParent instanceof QueryTree) && parentsParent.getItemType() == newParent.getItemType())) { // Collapse - newParent = parentsParent; + for (Item item : items) + parentsParent.addItem(item); + return; } for (Item item : items) diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java index 1db668ec9187..39f7dbe6918f 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java @@ -151,8 +151,8 @@ void testOrProduction() { @Test void testDoubleOrProduction() { assertSemantics("OR more evenmore", "somethingmore"); - // This does not seem right: - assertSemantics("OR more (AND foo1 bar2) evenmore (AND foo1 bar2)", "foo1 somethingmore bar2"); + // Strange ordering: + assertSemantics("OR more (AND foo1 bar2) evenmore", "foo1 somethingmore bar2"); } // This test is order dependent. Fix it!! From 692a26491bc2b1ba53215a871600bcb6198e7e9e Mon Sep 17 00:00:00 2001 From: Kristian Aune Date: Wed, 20 Nov 2024 16:43:56 +0100 Subject: [PATCH 094/126] Put Description to the right in the table --- .../java/ai/vespa/metrics/docs/MetricSetDocumentation.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metrics/src/main/java/ai/vespa/metrics/docs/MetricSetDocumentation.java b/metrics/src/main/java/ai/vespa/metrics/docs/MetricSetDocumentation.java index a15f29160917..6fcca1cb9851 100644 --- a/metrics/src/main/java/ai/vespa/metrics/docs/MetricSetDocumentation.java +++ b/metrics/src/main/java/ai/vespa/metrics/docs/MetricSetDocumentation.java @@ -63,7 +63,7 @@ protected static void writeMetricSetDocumentation(String path, String name, Metr

%s Metrics

- + %s @@ -92,9 +92,9 @@ private static String htmlRows(Map> metrics) """, entry.getKey().baseName().replaceAll("\\.", "_"), entry.getKey().baseName(), - entry.getKey().description(), entry.getKey().unit().toString().toLowerCase(), - String.join(", ", entry.getValue())) + String.join(", ", entry.getValue()), + entry.getKey().description()) ).collect(Collectors.joining()); } From cffef13f4c32521403414dd580adc73bb4e45326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ola=20Aunr=C3=B8nning?= Date: Wed, 20 Nov 2024 17:10:38 +0100 Subject: [PATCH 095/126] Swap CC limit metric descriptions --- .../main/java/ai/vespa/metrics/ClusterControllerMetrics.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java index f15b7412b24d..577a7a48673c 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java @@ -26,8 +26,8 @@ public enum ClusterControllerMetrics implements VespaMetrics { RESOURCE_USAGE_NODES_ABOVE_LIMIT("cluster-controller.resource_usage.nodes_above_limit", Unit.NODE, "The number of content nodes above resource limit, blocking feed"), RESOURCE_USAGE_MAX_MEMORY_UTILIZATION("cluster-controller.resource_usage.max_memory_utilization", Unit.FRACTION, "Current memory utilisation, for content node with highest value"), RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, for content node with highest value"), - RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"), - RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"), + RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"), + RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"), REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress"); From 8ba5822387333913a0a2e916321f5fb0cc724cbe Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Wed, 20 Nov 2024 17:42:20 +0100 Subject: [PATCH 096/126] Update owner and expiry date for some feature flags --- flags/src/main/java/com/yahoo/vespa/flags/Flags.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 400ebf0a69f0..3274a0dc49bf 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -292,12 +292,12 @@ public class Flags { NODE_TYPE, HOSTNAME); public static final UnboundListFlag ZONAL_WEIGHTED_ENDPOINT_RECORDS = defineListFlag( - "zonal-weighted-endpoint-records", List.of(), String.class, List.of("jonmv"), "2023-12-15", "2024-12-01", + "zonal-weighted-endpoint-records", List.of(), String.class, List.of("hmusum"), "2023-12-15", "2025-02-01", "A list of weighted (application) endpoint fqdns for which we should use zonal endpoints as targets, not LBs.", "Takes effect at redeployment from controller"); public static final UnboundListFlag WEIGHTED_ENDPOINT_RECORD_TTL = defineListFlag( - "weighted-endpoint-record-ttl", List.of(), String.class, List.of("jonmv"), "2023-05-16", "2024-12-01", + "weighted-endpoint-record-ttl", List.of(), String.class, List.of("hmusum"), "2023-05-16", "2025-02-01", "A list of endpoints and custom TTLs, on the form \"endpoint-fqdn:TTL-seconds\". " + "Where specified, CNAME records are used instead of the default ALIAS records, which have a default 60s TTL.", "Takes effect at redeployment from controller"); @@ -424,7 +424,7 @@ public class Flags { public static final UnboundBooleanFlag LAUNCH_APPLICATION_ATHENZ_SERVICE = defineFeatureFlag( "launch-application-athenz-service", false, - List.of("jonmv"), "2024-06-11", "2025-01-10", + List.of("hmusum"), "2024-06-11", "2025-02-01", "Whether to launch an Athenz service unique to the application. Only valid in public systems!", "Takes effect on next deployment", INSTANCE_ID); From b3405f928e7a53d35ad84bfa6c69e04f3b127261 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Wed, 20 Nov 2024 19:22:01 +0100 Subject: [PATCH 097/126] Adjust getSearchableStats() API to limit disk io stats clearing to metrics updater task. --- .../vespa/searchcore/proton/index/diskindexwrapper.h | 4 ++-- .../src/vespa/searchcore/proton/index/indexmanager.h | 4 ++-- .../searchcore/proton/index/memoryindexwrapper.h | 2 +- .../proton/server/documentdb_metrics_updater.cpp | 2 +- .../vespa/searchcore/proton/server/idocumentsubdb.h | 2 +- .../searchcore/proton/server/searchabledocsubdb.cpp | 6 +++--- .../searchcore/proton/server/searchabledocsubdb.h | 2 +- .../searchcore/proton/server/storeonlydocsubdb.cpp | 2 +- .../searchcore/proton/server/storeonlydocsubdb.h | 2 +- .../searchcore/proton/test/dummy_document_sub_db.h | 2 +- .../searchcore/proton/test/mock_index_manager.h | 2 +- .../vespa/searchcorespi/index/fakeindexsearchable.h | 2 +- .../src/vespa/searchcorespi/index/iindexmanager.h | 2 +- .../searchcorespi/index/index_searchable_stats.cpp | 2 +- .../vespa/searchcorespi/index/indexcollection.cpp | 4 ++-- .../src/vespa/searchcorespi/index/indexcollection.h | 2 +- .../vespa/searchcorespi/index/indexmaintainer.cpp | 12 ++++++------ .../src/vespa/searchcorespi/index/indexmaintainer.h | 4 ++-- .../src/vespa/searchcorespi/index/indexsearchable.h | 2 +- .../searchcorespi/index/warmupindexcollection.cpp | 4 ++-- .../searchcorespi/index/warmupindexcollection.h | 2 +- .../src/tests/diskindex/diskindex/diskindex_test.cpp | 2 +- .../src/vespa/searchlib/diskindex/diskindex.cpp | 4 ++-- searchlib/src/vespa/searchlib/diskindex/diskindex.h | 2 +- .../src/vespa/searchlib/diskindex/field_index.cpp | 4 ++-- .../src/vespa/searchlib/diskindex/field_index.h | 6 +++--- .../src/vespa/searchlib/util/cache_disk_io_stats.h | 8 +++++++- 27 files changed, 49 insertions(+), 43 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/index/diskindexwrapper.h b/searchcore/src/vespa/searchcore/proton/index/diskindexwrapper.h index efe1dd87a287..b7e7b82ceb49 100644 --- a/searchcore/src/vespa/searchcore/proton/index/diskindexwrapper.h +++ b/searchcore/src/vespa/searchcore/proton/index/diskindexwrapper.h @@ -31,8 +31,8 @@ class DiskIndexWrapper : public searchcorespi::index::IDiskIndex { createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) override { return _index.createBlueprint(requestContext, fields, term); } - search::SearchableStats getSearchableStats() const override { - return _index.get_stats(); + search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override { + return _index.get_stats(clear_disk_io_stats); } search::SerialNum getSerialNum() const override; diff --git a/searchcore/src/vespa/searchcore/proton/index/indexmanager.h b/searchcore/src/vespa/searchcore/proton/index/indexmanager.h index f48087be71ea..37524491a08a 100644 --- a/searchcore/src/vespa/searchcore/proton/index/indexmanager.h +++ b/searchcore/src/vespa/searchcore/proton/index/indexmanager.h @@ -120,8 +120,8 @@ class IndexManager : public searchcorespi::IIndexManager return _maintainer.getSearchable(); } - search::SearchableStats getSearchableStats() const override { - return _maintainer.getSearchableStats(); + search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override { + return _maintainer.getSearchableStats(clear_disk_io_stats); } searchcorespi::IFlushTarget::List getFlushTargets() override { diff --git a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h index c34da2212801..a3ce0f5603e4 100644 --- a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h +++ b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h @@ -49,7 +49,7 @@ class MemoryIndexWrapper : public searchcorespi::index::IMemoryIndex { { return _index.createBlueprint(requestContext, fields, term); } - search::SearchableStats getSearchableStats() const override { + search::SearchableStats getSearchableStats(bool) const override { return _index.get_stats(); } diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp index 88cf213816a8..0ee7ffb99479 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp @@ -303,7 +303,7 @@ DocumentDBMetricsUpdater::updateMetrics(const metrics::MetricLockGuard & guard, { TotalStats totalStats; ExecutorThreadingServiceStats threadingServiceStats = _writeService.getStats(); - updateIndexMetrics(metrics, _subDBs.getReadySubDB()->getSearchableStats(), totalStats); + updateIndexMetrics(metrics, _subDBs.getReadySubDB()->getSearchableStats(true), totalStats); updateAttributeMetrics(metrics, _subDBs, totalStats); updateMatchingMetrics(guard, metrics, *_subDBs.getReadySubDB()); updateDocumentsMetrics(metrics, _subDBs); diff --git a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h index d5eaf2db2e74..05d0d34fa84d 100644 --- a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h @@ -122,7 +122,7 @@ class IDocumentSubDB virtual SerialNum getNewestFlushedSerial() = 0; virtual void pruneRemovedFields(SerialNum serialNum) = 0; virtual void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) = 0; - virtual search::SearchableStats getSearchableStats() const = 0; + virtual search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const = 0; virtual std::shared_ptr getDocumentRetriever() = 0; virtual matching::MatchingStats getMatcherStats(const std::string &rankProfile) const = 0; diff --git a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp index 1000ac5f55e3..ae53eb8372e6 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp @@ -319,9 +319,9 @@ SearchableDocSubDB::getNumActiveDocs() const } search::SearchableStats -SearchableDocSubDB::getSearchableStats() const +SearchableDocSubDB::getSearchableStats(bool clear_disk_io_stats) const { - return _indexMgr ? _indexMgr->getSearchableStats() : search::SearchableStats(); + return _indexMgr ? _indexMgr->getSearchableStats(clear_disk_io_stats) : search::SearchableStats(); } std::shared_ptr @@ -375,7 +375,7 @@ SearchableDocSubDB::get_transient_resource_usage() const auto result = FastAccessDocSubDB::get_transient_resource_usage(); // Transient disk usage is measured as the total disk usage of all current fusion indexes. // Transient memory usage is measured as the total memory usage of all memory indexes. - auto stats = getSearchableStats(); + auto stats = getSearchableStats(false); result.merge({stats.fusion_size_on_disk(), stats.memoryUsage().allocatedBytes()}); return result; } diff --git a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h index 7436a89382ba..a34a11861423 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h @@ -134,7 +134,7 @@ SearchableDocSubDB : public FastAccessDocSubDB, SerialNum getNewestFlushedSerial() override; void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) override; size_t getNumActiveDocs() const override; - search::SearchableStats getSearchableStats() const override ; + search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override ; std::shared_ptr getDocumentRetriever() override; matching::MatchingStats getMatcherStats(const std::string &rankProfile) const override; void close() override; diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp index a94ee90877f2..3c6dc91b8860 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp @@ -544,7 +544,7 @@ StoreOnlyDocSubDB::setIndexSchema(std::shared_ptr, SerialNum) } search::SearchableStats -StoreOnlyDocSubDB::getSearchableStats() const +StoreOnlyDocSubDB::getSearchableStats(bool) const { return {}; } diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h index 182234bb86d8..0d22ac4598a5 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h @@ -231,7 +231,7 @@ class StoreOnlyDocSubDB : public DocSubDB void pruneRemovedFields(SerialNum serialNum) override; void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) override; - search::SearchableStats getSearchableStats() const override; + search::SearchableStats getSearchableStats(bool) const override; std::shared_ptr getDocumentRetriever() override; matching::MatchingStats getMatcherStats(const std::string &rankProfile) const override; void close() override; diff --git a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h index 7155e120a12f..70f592667557 100644 --- a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h +++ b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h @@ -74,7 +74,7 @@ struct DummyDocumentSubDb : public IDocumentSubDB SerialNum getNewestFlushedSerial() override { return 0; } void pruneRemovedFields(SerialNum) override { } void setIndexSchema(std::shared_ptr, SerialNum) override { } - search::SearchableStats getSearchableStats() const override { + search::SearchableStats getSearchableStats(bool) const override { return {}; } std::shared_ptr getDocumentRetriever() override { diff --git a/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h b/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h index 53af2381e7fe..f69c11971e47 100644 --- a/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h +++ b/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h @@ -19,7 +19,7 @@ struct MockIndexManager : public searchcorespi::IIndexManager searchcorespi::IndexSearchable::SP getSearchable() const override { return searchcorespi::IndexSearchable::SP(); } - search::SearchableStats getSearchableStats() const override { + search::SearchableStats getSearchableStats(bool) const override { return search::SearchableStats(); } searchcorespi::IFlushTarget::List getFlushTargets() override { diff --git a/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h b/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h index fa34ad410010..2696fa8176fd 100644 --- a/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h +++ b/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h @@ -28,7 +28,7 @@ class FakeIndexSearchable : public IndexSearchable { return _fake.createBlueprint(requestContext, field, term); } - search::SearchableStats getSearchableStats() const override { + search::SearchableStats getSearchableStats(bool) const override { return search::SearchableStats(); } diff --git a/searchcore/src/vespa/searchcorespi/index/iindexmanager.h b/searchcore/src/vespa/searchcorespi/index/iindexmanager.h index a54dc422f645..4accba57d0e5 100644 --- a/searchcore/src/vespa/searchcorespi/index/iindexmanager.h +++ b/searchcore/src/vespa/searchcorespi/index/iindexmanager.h @@ -178,7 +178,7 @@ class IIndexManager { * * @return statistics gathered about underlying memory and disk indexes. */ - virtual search::SearchableStats getSearchableStats() const = 0; + virtual search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const = 0; /** * Returns the list of all flush targets contained in this index manager. diff --git a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp index f74c9c581326..37c30ad9ddd7 100644 --- a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp +++ b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp @@ -14,7 +14,7 @@ IndexSearchableStats::IndexSearchableStats() IndexSearchableStats::IndexSearchableStats(const IndexSearchable &index) : _serialNum(index.getSerialNum()), - _searchableStats(index.getSearchableStats()) + _searchableStats(index.getSearchableStats(false)) { } diff --git a/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp b/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp index dfc25865dc14..0647e366a975 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp @@ -109,11 +109,11 @@ IndexCollection::getSourceId(uint32_t i) const } search::SearchableStats -IndexCollection::getSearchableStats() const +IndexCollection::getSearchableStats(bool clear_disk_io_stats) const { search::SearchableStats stats; for (size_t i = 0; i < _sources.size(); ++i) { - stats.merge(_sources[i].source_wrapper->getSearchableStats()); + stats.merge(_sources[i].source_wrapper->getSearchableStats(clear_disk_io_stats)); } return stats; } diff --git a/searchcore/src/vespa/searchcorespi/index/indexcollection.h b/searchcore/src/vespa/searchcorespi/index/indexcollection.h index 6f7e4c3c29a4..1ad57bc6eb87 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexcollection.h +++ b/searchcore/src/vespa/searchcorespi/index/indexcollection.h @@ -50,7 +50,7 @@ class IndexCollection : public ISearchableIndexCollection createBlueprint(const IRequestContext & requestContext, const FieldSpec &field, const Node &term) override; std::unique_ptr createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) override; - search::SearchableStats getSearchableStats() const override; + search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override; void accept(IndexSearchableVisitor &visitor) const override; diff --git a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp index f0300a1c4827..c7960be2568b 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp @@ -135,7 +135,7 @@ class DiskIndexWithDestructorCallback : public IDiskIndex { { return _index->createBlueprint(requestContext, fields, term); } - search::SearchableStats getSearchableStats() const override; + search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override { return _index->getSerialNum(); } @@ -161,9 +161,9 @@ class DiskIndexWithDestructorCallback : public IDiskIndex { DiskIndexWithDestructorCallback::~DiskIndexWithDestructorCallback() = default; search::SearchableStats -DiskIndexWithDestructorCallback::getSearchableStats() const +DiskIndexWithDestructorCallback::getSearchableStats(bool clear_disk_io_stats) const { - auto stats = _index->getSearchableStats(); + auto stats = _index->getSearchableStats(clear_disk_io_stats); uint64_t transient_size = _disk_indexes.get_transient_size(_layout, _index_disk_dir); stats.fusion_size_on_disk(transient_size); return stats; @@ -315,7 +315,7 @@ IndexMaintainer::loadDiskIndex(const string &indexDir) } vespalib::Timer timer; auto index = _operations.loadDiskIndex(indexDir); - auto stats = index->getSearchableStats(); + auto stats = index->getSearchableStats(false); _disk_indexes->setActive(indexDir, stats.sizeOnDisk()); auto retval = std::make_shared( std::move(index), @@ -338,7 +338,7 @@ IndexMaintainer::reloadDiskIndex(const IDiskIndex &oldIndex) vespalib::Timer timer; const IDiskIndex &wrappedDiskIndex = (dynamic_cast(oldIndex)).getWrapped(); auto index = _operations.reloadDiskIndex(wrappedDiskIndex); - auto stats = index->getSearchableStats(); + auto stats = index->getSearchableStats(false); _disk_indexes->setActive(indexDir, stats.sizeOnDisk()); auto retval = std::make_shared( std::move(index), @@ -1184,7 +1184,7 @@ IndexMaintainer::getFusionStats() const source_list = _source_list; stats.maxFlushed = _maxFlushed; } - stats.diskUsage = source_list->getSearchableStats().sizeOnDisk(); + stats.diskUsage = source_list->getSearchableStats(false).sizeOnDisk(); { LockGuard guard(_fusion_lock); stats.numUnfused = _fusion_spec.flush_ids.size() + ((_fusion_spec.last_fusion_id != 0) ? 1 : 0); diff --git a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h index 89ded5fd2866..0e429aeee833 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h +++ b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h @@ -361,9 +361,9 @@ class IndexMaintainer : public IIndexManager, return _source_list; } - search::SearchableStats getSearchableStats() const override { + search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override { LockGuard lock(_new_search_lock); - return _source_list->getSearchableStats(); + return _source_list->getSearchableStats(clear_disk_io_stats); } IFlushTarget::List getFlushTargets() override; diff --git a/searchcore/src/vespa/searchcorespi/index/indexsearchable.h b/searchcore/src/vespa/searchcorespi/index/indexsearchable.h index 8beb0ab7a61b..d157a8c341dd 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexsearchable.h +++ b/searchcore/src/vespa/searchcorespi/index/indexsearchable.h @@ -40,7 +40,7 @@ class IndexSearchable : public search::queryeval::Searchable, /** * Returns the searchable stats for this index searchable. */ - virtual search::SearchableStats getSearchableStats() const = 0; + virtual search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const = 0; /** * Returns the serial number for this index searchable. diff --git a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp index 3d396b1cf913..cdd6f2bc892d 100644 --- a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp +++ b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp @@ -226,9 +226,9 @@ WarmupIndexCollection::createBlueprint(const IRequestContext & requestContext, } search::SearchableStats -WarmupIndexCollection::getSearchableStats() const +WarmupIndexCollection::getSearchableStats(bool clear_disk_io_stats) const { - return _prev->getSearchableStats(); + return _prev->getSearchableStats(clear_disk_io_stats); } diff --git a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h index 7503b1173c59..a0936a26581f 100644 --- a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h +++ b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h @@ -48,7 +48,7 @@ class WarmupIndexCollection : public ISearchableIndexCollection, createBlueprint(const IRequestContext & requestContext, const FieldSpec &field, const Node &term) override; std::unique_ptr createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) override; - search::SearchableStats getSearchableStats() const override; + search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override; void accept(IndexSearchableVisitor &visitor) const override; diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index 3548a4a59e81..213b4360dc9a 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -448,7 +448,7 @@ DiskIndexTest::build_index(const IOSettings& io_settings, const EmptySettings& e void DiskIndexTest::require_that_get_stats_works() { - auto stats = getIndex().get_stats(); + auto stats = getIndex().get_stats(false); auto& schema = getIndex().getSchema(); EXPECT_LT(0, stats.sizeOnDisk()); auto field_stats = stats.get_field_stats(); diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index f579aecceed6..77432163ac6f 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -399,13 +399,13 @@ DiskIndex::get_field_length_info(const std::string& field_name) const } SearchableStats -DiskIndex::get_stats() const +DiskIndex::get_stats(bool clear_disk_io_stats) const { SearchableStats stats; uint64_t size_on_disk = _nonfield_size_on_disk; uint32_t field_id = 0; for (auto& field_index : _field_indexes) { - auto field_stats = field_index.get_stats(); + auto field_stats = field_index.get_stats(clear_disk_io_stats); size_on_disk += field_stats.size_on_disk(); stats.add_field_stats(_schema.getIndexField(field_id).getName(), field_stats); ++field_id; diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h index 13eec5cece96..226515e7fd0b 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.h +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h @@ -110,7 +110,7 @@ class DiskIndex : public queryeval::Searchable { /** * Get stats for this index. */ - SearchableStats get_stats() const; + SearchableStats get_stats(bool clear_disk_io_stats) const; const index::Schema &getSchema() const { return _schema; } const std::string &getIndexDir() const { return _indexDir; } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index 6247c73d56f6..cfe4f142c669 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -279,9 +279,9 @@ FieldIndex::get_field_length_info() const } FieldIndexStats -FieldIndex::get_stats() const +FieldIndex::get_stats(bool clear_disk_io_stats) const { - auto cache_disk_io_stats = _cache_disk_io_stats->read_and_clear(); + auto cache_disk_io_stats = _cache_disk_io_stats->read_and_maybe_clear(clear_disk_io_stats); return FieldIndexStats().size_on_disk(_size_on_disk).cache_disk_io_stats(cache_disk_io_stats); } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index 2b8125c814b5..fb685601b9a4 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -42,9 +42,9 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { _stats.add_cached_read_operation(bytes); } - CacheDiskIoStats read_and_clear() { + CacheDiskIoStats read_and_maybe_clear(bool clear_disk_io_stats) { std::lock_guard guard(_mutex); - return _stats.read_and_clear(); + return _stats.read_and_maybe_clear(clear_disk_io_stats); } }; @@ -86,7 +86,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { index::FieldLengthInfo get_field_length_info() const; index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); } - FieldIndexStats get_stats() const; + FieldIndexStats get_stats(bool clear_disk_io_stats) const; uint32_t get_field_id() const noexcept { return _field_id; } bool is_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; } }; diff --git a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h b/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h index cd6f6d891853..11d1dffa4829 100644 --- a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h +++ b/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h @@ -32,7 +32,13 @@ class CacheDiskIoStats { return _read == rhs.read() && _cached_read == rhs.cached_read(); } - CacheDiskIoStats read_and_clear() noexcept { auto result = *this; clear(); return result; } + CacheDiskIoStats read_and_maybe_clear(bool clear_disk_io_stats) noexcept { + auto result = *this; + if (clear_disk_io_stats) { + clear(); + } + return result; + } void clear() noexcept { _read.clear(); _cached_read.clear(); From d91b3849e892001c12c6e7351c36ad57b9f9bc45 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 20 Nov 2024 21:26:56 +0000 Subject: [PATCH 098/126] add tracing of retrying and giving up --- .../searcher/FieldCollapsingSearcher.java | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java index dd586c4c3089..b8b5496cf6fb 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -79,6 +79,9 @@ public FieldCollapsingSearcher(int collapseSize, double extraFactor) { private void init(int collapseSize, double extraFactor) { this.defaultCollapseSize = collapseSize; this.extraFactor = extraFactor; + if (extraFactor < 1.0) { + throw new IllegalArgumentException("FieldCollapsingSearcher: extraFactor " + extraFactor + " should be >= 1.0"); + } } /** @@ -95,9 +98,10 @@ public Result search(com.yahoo.search.Query query, Execution execution) { int globalCollapseSize = query.properties().getInteger(collapseSize, defaultCollapseSize); query.properties().set(collapse, "0"); - int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0; + int wantedHits = query.getOffset() + query.getHits(); + int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((wantedHits + 1) * extraFactor) : 0; int nextOffset = 0; - int hitsAfterCollapse; + int hitsAfterCollapse = 0; boolean moreHitsAvailable = true; Map knownCollapses = new java.util.HashMap<>(); Result result = new Result(query); @@ -107,8 +111,11 @@ public Result search(com.yahoo.search.Query query, Execution execution) { String summaryClass = (collapseSummary == null) ? query.getPresentation().getSummary() : collapseSummary; query.trace("Collapsing by '" + Arrays.toString(collapseFields) + "' using summary '" + collapseSummary + "'", 2); - + boolean wantAnotherQuery; do { + if (performedQueries > 0) { + query.trace("Collapsing: retry " + performedQueries + ", only has " + hitsAfterCollapse + " hits, wanted " + wantedHits, 2); + } resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); fill(resultSource, summaryClass, execution); @@ -120,16 +127,18 @@ public Result search(com.yahoo.search.Query query, Execution execution) { moreHitsAvailable = false; } nextOffset += hitsToRequest; - if (hitsAfterCollapse < query.getOffset() + query.getHits()) { + if (hitsAfterCollapse < wantedHits) { hitsToRequest = (int) Math.ceil(hitsToRequest * extraFactor); } ++performedQueries; - - } while (hitsToRequest != 0 - && (hitsAfterCollapse < query.getOffset() + query.getHits()) - && moreHitsAvailable - && (performedQueries <= maxQueries)); - + wantAnotherQuery = (hitsToRequest != 0 + && (hitsAfterCollapse < wantedHits) + && moreHitsAvailable); + } while (wantAnotherQuery && (performedQueries <= maxQueries)); + // failure? + if (wantAnotherQuery) { + query.trace("Collapsing: giving up after " + performedQueries + " performed queries, collapsing removed too many hits", 1); + } // Set correct meta information result.mergeWith(resultSource); // Keep only (offset ... offset+hits) hits From 30357908ea4af05cbdc58e5ca20e9746ddacb7c6 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Wed, 20 Nov 2024 22:52:53 +0100 Subject: [PATCH 099/126] More debug logging when deleting expired sessions Also return early if there are no expired sessions for a tenant --- .../config/server/session/SessionRepository.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index ee63567a0459..fd85ba4064f8 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -607,6 +607,8 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi // All known sessions, both local (file) and remote (zookeeper) List sessions = getLocalSessionsIdsFromFileSystem(); sessions.addAll(getRemoteSessionsFromZooKeeper()); + if (sessions.isEmpty()) return; + log.log(Level.FINE, () -> "Sessions for tenant " + tenantName + ": " + sessions); // Skip sessions newly added (we might have a session in the file system, but not in ZooKeeper, @@ -629,10 +631,15 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi try (var ignored = lockApplication(applicationId)) { Session.Status status = session.getStatus(); boolean activeForApplication = sessionIsActiveForApplication.test(session); + log.log(Level.FINE, () -> "local session " + sessionId + + ", status " + status + (status == UNKNOWN ? "" : ", activeForApplication " + activeForApplication)); if (status == ACTIVATE && activeForApplication) continue; Instant createTime = session.getCreateTime(); boolean hasExpired = hasExpired(createTime); + log.log(Level.FINE, () -> "local session " + sessionId + + ", status " + status + (status == UNKNOWN ? "" : ", created " + createTime + + ", has expired: " + hasExpired)); if (! hasExpired) continue; log.log(Level.FINE, () -> "Remote session " + sessionId + " for " + tenantName + " has expired, deleting it"); @@ -640,10 +647,8 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi deletedRemoteSessions++; var localSessionCanBeDeleted = canBeDeleted(sessionId, status, createTime, activeForApplication); - log.log(Level.FINE, () -> "Expired local session " + sessionId + - ", status " + status + (status == UNKNOWN ? "" : ", created " + createTime) + - ", can be deleted: " + localSessionCanBeDeleted); if (localSessionCanBeDeleted) { + log.log(Level.FINE, () -> "Expired local session " + sessionId + " can be deleted"); deleteLocalSession(sessionId); deletedLocalSessions++; } @@ -655,7 +660,7 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi } } log.log(Level.FINE, "Deleted " + deletedRemoteSessions + " remote and " + deletedLocalSessions + - " local sessions that had expired"); + " local sessions for tenant " + tenantName + " that had expired"); } private record ApplicationLock(Optional lock) implements Closeable { From 5671a8e24a876649333c7afa97d3e63309e4a21e Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 21 Nov 2024 08:26:35 +0000 Subject: [PATCH 100/126] add trace when TopK probability is active --- .../com/yahoo/search/dispatch/InterleavedSearchInvoker.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java index a246589ec7eb..2d7312530862 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java @@ -96,6 +96,9 @@ protected Object sendSearchRequest(Query query, Object unusedContext) throws IOE ? estimateHitsToFetch(neededHits, invokers.size(), topkProbabilityOverrride) : estimateHitsToFetch(neededHits, invokers.size()); } + if (q < neededHits) { + query.trace("Only fetching " + q + " of " + neededHits + " per node (TopK probability for " + invokers.size() + " nodes)", 1); + } query.setHits(q); query.setOffset(0); From 6852e86a5aea5873d47ee56ce4aefd294dc38406 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 21 Nov 2024 09:01:19 +0000 Subject: [PATCH 101/126] add basic trace about search request and result --- .../searchcore/proton/matchengine/matchengine.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp index d6db14c302a4..bfb14e14126d 100644 --- a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp +++ b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp @@ -2,11 +2,12 @@ #include "matchengine.h" #include #include +#include #include #include -#include -#include #include +#include +#include #include @@ -132,6 +133,12 @@ MatchEngine::doSearch(const SearchRequest & searchRequest) { // 3 is the minimum level required for backend tracing. searchRequest.setTraceLevel(trace::Level::lookup(searchRequest.propertiesMap.modelOverrides(), searchRequest.trace().getLevel()), 3); + searchRequest.trace().addEvent(4, + vespalib::make_string("searching for %u hits at offset %u%s%s", + searchRequest.maxhits, + searchRequest.offset, + searchRequest.sortSpec.empty() ? "" : " (with sorting)", + searchRequest.groupSpec.empty() ? "" : " (with grouping)")); ISearchHandler::SP searchHandler; auto threadBundle = _threadBundlePool.getBundle(); { // try to find the match handler corresponding to the specified search doc type @@ -155,6 +162,9 @@ MatchEngine::doSearch(const SearchRequest & searchRequest) { if (searchRequest.expired()) { vespalib::Issue::report("search request timed out; results may be incomplete"); } + searchRequest.trace().addEvent(4, + vespalib::make_string("returning %zu hits from total %zu", + ret->hits.size(), ret->totalHitCount)); return ret; } From a7fa11d05ca422ad57f7d743046d205f7030421d Mon Sep 17 00:00:00 2001 From: gjoranv Date: Thu, 21 Nov 2024 09:59:27 +0100 Subject: [PATCH 102/126] Propagate list of secrets in asm-tenant-secret config --- .../model/container/xml/CloudAsmSecrets.java | 14 +++++++---- .../model/container/xml/SecretsTest.java | 25 +++++++++++++------ .../configdefinitions/asm-tenant-secret.def | 2 ++ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java index 44c0eeae5fb3..9bcf942d159e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java @@ -53,13 +53,17 @@ public void getConfig(AsmTenantSecretConfig.Builder builder) { builder.system(system.value()) .tenant(tenant.value()); - tenantVaults.forEach(vault -> { - builder.vaults(vaultBuilder -> { - vaultBuilder.id(vault.id()) + tenantVaults.forEach(vault -> builder.vaults( + vaultBuilder -> { vaultBuilder + .id(vault.id()) .name(vault.name()) .externalId(vault.externalId()); - }); - }); + + vault.secrets().forEach(secret -> vaultBuilder.secrets( + secretBuilder -> secretBuilder + .id(secret.id()) + .name(secret.name()))); + })); } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java index 8e51813d3cee..488d9209db8e 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java @@ -57,7 +57,8 @@ void testSecretsCanBeSetUp() { void tenant_vaults_are_propagated_in_config() { var tenantVaults = List.of( new TenantVault("id1", "name1", "externalId1", List.of()), - new TenantVault("id2", "name2", "externalId2", List.of())); + new TenantVault("id2", "name2", "externalId2", + List.of(new TenantVault.Secret("sId1", "sName1")))); var deployState = new DeployState.Builder() .properties(new TestProperties() @@ -76,13 +77,21 @@ void tenant_vaults_are_propagated_in_config() { var vaults = config.vaults(); assertEquals(2, vaults.size()); - assertEquals("id1", vaults.get(0).id()); - assertEquals("name1", vaults.get(0).name()); - assertEquals("externalId1", vaults.get(0).externalId()); - - assertEquals("id2", vaults.get(1).id()); - assertEquals("name2", vaults.get(1).name()); - assertEquals("externalId2", vaults.get(1).externalId()); + var vault1 = vaults.get(0); + assertEquals("id1", vault1.id()); + assertEquals("name1", vault1.name()); + assertEquals("externalId1", vault1.externalId()); + assertEquals(0, vault1.secrets().size()); + + var vault2 = vaults.get(1); + assertEquals("id2", vault2.id()); + assertEquals("name2", vault2.name()); + assertEquals("externalId2", vault2.externalId()); + assertEquals(1, vault2.secrets().size()); + + var secret = vault2.secrets().get(0); + assertEquals("sId1", secret.id()); + assertEquals("sName1", secret.name()); } private static AsmTenantSecretConfig getAsmTenantSecretConfig(ApplicationContainerCluster container) { diff --git a/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def b/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def index 61f9119594ba..3049c6429e23 100644 --- a/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def +++ b/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def @@ -8,3 +8,5 @@ tenant string vaults[].id string vaults[].name string vaults[].externalId string +vaults[].secrets[].id string +vaults[].secrets[].name string From 95c4cbbfbae9b3ea7320ec324dca59c399e9b62f Mon Sep 17 00:00:00 2001 From: Arne H Juul Date: Thu, 21 Nov 2024 10:38:42 +0100 Subject: [PATCH 103/126] Update container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java Co-authored-by: Geir Storli --- .../com/yahoo/search/dispatch/InterleavedSearchInvoker.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java index 2d7312530862..20512465fb2b 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java @@ -97,7 +97,7 @@ protected Object sendSearchRequest(Query query, Object unusedContext) throws IOE : estimateHitsToFetch(neededHits, invokers.size()); } if (q < neededHits) { - query.trace("Only fetching " + q + " of " + neededHits + " per node (TopK probability for " + invokers.size() + " nodes)", 1); + query.trace("Only fetching " + q + " of " + neededHits + " hits per node (TopK probability for " + invokers.size() + " nodes)", 1); } query.setHits(q); query.setOffset(0); From 87a9d0ef8aae9d7a8fc00ac499782e4e8e236ce7 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 21 Nov 2024 10:19:55 +0000 Subject: [PATCH 104/126] do not serialize to protobuf when message is too big --- searchlib/src/vespa/searchlib/engine/proto_converter.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/searchlib/src/vespa/searchlib/engine/proto_converter.cpp b/searchlib/src/vespa/searchlib/engine/proto_converter.cpp index 519b4f7785ee..c8333db91264 100644 --- a/searchlib/src/vespa/searchlib/engine/proto_converter.cpp +++ b/searchlib/src/vespa/searchlib/engine/proto_converter.cpp @@ -237,7 +237,11 @@ ProtoConverter::docsum_reply_to_proto(const DocsumReply &reply, ProtoDocsumReply if (reply.hasResult()) { vespalib::SmartBuffer buf(4_Ki); vespalib::slime::BinaryFormat::encode(reply.slime(), buf); - proto.set_slime_summaries(buf.obtain().data, buf.obtain().size); + if (buf.obtain().size < 2_Gi - 4_Ki) { + proto.set_slime_summaries(buf.obtain().data, buf.obtain().size); + } else { + proto.add_errors()->set_message("Error: DocsumReply too big, > 2GB"); + } } if (reply.hasIssues()) { reply.issues().for_each_message([&](const std::string &err_msg) From ddd03bc15bfe975d5e4c464c0dad163078ef59df Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 21 Nov 2024 10:30:17 +0000 Subject: [PATCH 105/126] make "vespa fetch" work when /tmp is a different filesystem --- client/go/internal/vespa/deploy.go | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/client/go/internal/vespa/deploy.go b/client/go/internal/vespa/deploy.go index 2c96b8b09352..0ca070f73a68 100644 --- a/client/go/internal/vespa/deploy.go +++ b/client/go/internal/vespa/deploy.go @@ -203,7 +203,30 @@ func fetchFromConfigServer(deployment DeploymentOptions, path string) error { if err := zipDir(dir, zipFile, &ignore.List{}); err != nil { return err } - return os.Rename(zipFile, path) + if err = renameOrCopyTmpFile(zipFile, path); err != nil { + return fmt.Errorf("Could neither rename nor copy %s to %s: %w", zipFile, path, err) + } + return err +} + +func renameOrCopyTmpFile(srcPath, dstPath string) error { + if err := os.Rename(srcPath, dstPath); err == nil { + return err + } + src, err := os.Open(srcPath) + if err != nil { + return err + } + stat, err := os.Stat(srcPath) + if err != nil { + return err + } + dst, err := os.OpenFile(dstPath, os.O_CREATE|os.O_WRONLY, stat.Mode()) + if err != nil { + return err + } + _, err = io.Copy(dst, src) + return err } func fetchFilesFromConfigServer(deployment DeploymentOptions, contentURL *url.URL, path string) error { From e6469681bdba0756fc377ebbfbfc96d809473c20 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Thu, 21 Nov 2024 11:52:02 +0100 Subject: [PATCH 106/126] Shorten snapshot expiry in CD --- .../hosted/provision/maintenance/SnapshotExpirer.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SnapshotExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SnapshotExpirer.java index fb6df1abd6af..fcb657d94a88 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SnapshotExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SnapshotExpirer.java @@ -24,7 +24,6 @@ public class SnapshotExpirer extends NodeRepositoryMaintainer { private static final Logger LOG = Logger.getLogger(SnapshotExpirer.class.getName()); - private static final Duration MIN_IDLE_PERIOD = Duration.ofDays(1); public SnapshotExpirer(NodeRepository nodeRepository, Duration interval, Metric metric) { super(nodeRepository, interval, metric); @@ -69,10 +68,15 @@ private boolean shouldRemoveAny(List snapshots, NodeList nodes, Instan /** Returns whether given snapshot should be removed */ private boolean shouldRemove(Snapshot snapshot, NodeList nodes, Instant now) { Duration idle = snapshot.idle(now); - if (idle.compareTo(MIN_IDLE_PERIOD) < 0) return false; // No: Snapshot not idle long enough + if (idle.compareTo(expiry()) < 0) return false; // No: Snapshot not idle long enough // TODO(mpolden): Replace this with a proper policy when implementing application-level backups if (nodes.node(snapshot.hostname().value()).isEmpty()) return true; // Yes: Snapshot belongs to non-existent node return snapshot.state() == Snapshot.State.restored; // Yes: Snapshot has been restored } + /** How long we should wait before a snapshot can be cleaned up */ + private Duration expiry() { + return nodeRepository().zone().system().isCd() ? Duration.ofHours(6) : Duration.ofDays(1); + } + } From 189f12d111089bab7bea23754ad0353e6c0dd0f8 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Thu, 21 Nov 2024 10:53:05 +0000 Subject: [PATCH 107/126] Use buffered logging for document selection eval warnings `LOGBP` throttles messages based on the log call site, preventing log spam if a failing selection is constantly evaluated. --- .../src/vespa/document/select/valuenodes.cpp | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/document/src/vespa/document/select/valuenodes.cpp b/document/src/vespa/document/select/valuenodes.cpp index 8f9c3f950fc6..8b80bae21970 100644 --- a/document/src/vespa/document/select/valuenodes.cpp +++ b/document/src/vespa/document/select/valuenodes.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include LOG_SETUP(".document.select.valuenode"); namespace document::select { @@ -374,8 +374,8 @@ IteratorHandler::getInternalValue(const FieldValue& fval) const default: break; } - LOG(warning, "Tried to use unsupported datatype %s in field comparison", - fval.getDataType()->toString().c_str()); + LOGBP(warning, "Tried to use unsupported datatype %s in field comparison", + fval.getDataType()->toString().c_str()); return std::make_unique(); } @@ -452,10 +452,10 @@ FieldValueNode::getValue(const Context& context) const } } } catch (vespalib::IllegalArgumentException& e) { - LOG(warning, "Caught exception while fetching field from document: %s", e.what()); + LOGBP(warning, "Caught exception while fetching field from document: %s", e.what()); return std::make_unique(); } catch (FieldNotFoundException& e) { - LOG(warning, "Tried to compare to field %s, not found in document type", _fieldExpression.c_str()); + LOGBP(warning, "Tried to compare with field %s, not found in document type", _fieldExpression.c_str()); return std::make_unique(); } } @@ -513,8 +513,8 @@ FieldValueNode::traceValue(const Context &context, std::ostream& out) const } } } catch (FieldNotFoundException& e) { - LOG(warning, "Tried to compare to field %s, not found in document type", - _fieldExpression.c_str()); + LOGBP(warning, "Tried to compare with field %s, not found in document type", + _fieldExpression.c_str()); out << "Field not found in document type " << doc.getType() << ". Returning invalid.\n"; return std::make_unique(); @@ -600,8 +600,7 @@ IdValueNode::getValue(const DocumentId& id) const if (id.getScheme().hasGroup()) { value = id.getScheme().getGroup(); } else { - fprintf(stderr, "***** Returning invalid value for %s\n", - id.toString().c_str()); + LOGBP(warning, "Returning invalid value for IdValueNode of type GROUP for id: %s", id.toString().c_str()); return std::make_unique(); } break; @@ -743,7 +742,7 @@ FunctionValueNode::FunctionValueNode(std::string_view name, } else if (name == "abs") { _function = ABS; } else { - throw ParsingFailedException("No function '" + std::string(name) + "' exist.", + throw ParsingFailedException("No function '" + std::string(name) + "' exists.", VESPA_STRLOC); } } @@ -791,7 +790,7 @@ FunctionValueNode::getValue(std::unique_ptr val) const case Value::Bucket: { throw ParsingFailedException( - "No functioncalls are allowed on value of type bucket", + "No function calls are allowed on value of type bucket", VESPA_STRLOC); break; } From e9be6f7ee35ee7aae73ff86a7204edeff6f8d01b Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 21 Nov 2024 12:46:46 +0100 Subject: [PATCH 108/126] Rename CachedDiskIoStats to FieldIndexIoStats. --- .../proton/metrics/disk_io_metrics.cpp | 10 +++++----- .../proton/metrics/disk_io_metrics.h | 6 +++--- .../proton/metrics/index_metrics_entry.cpp | 2 +- .../proton/metrics/index_metrics_entry.h | 2 +- .../server/documentdb_metrics_updater.cpp | 6 +++--- .../searchable_stats_test.cpp | 10 +++++----- .../vespa/searchlib/diskindex/field_index.cpp | 20 +++++++++---------- .../vespa/searchlib/diskindex/field_index.h | 12 +++++------ .../src/vespa/searchlib/util/CMakeLists.txt | 2 +- ..._io_stats.cpp => field_index_io_stats.cpp} | 4 ++-- ...disk_io_stats.h => field_index_io_stats.h} | 18 ++++++++--------- .../searchlib/util/field_index_stats.cpp | 2 +- .../vespa/searchlib/util/field_index_stats.h | 16 +++++++-------- 13 files changed, 55 insertions(+), 55 deletions(-) rename searchlib/src/vespa/searchlib/util/{cache_disk_io_stats.cpp => field_index_io_stats.cpp} (69%) rename searchlib/src/vespa/searchlib/util/{cache_disk_io_stats.h => field_index_io_stats.h} (65%) diff --git a/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp index 4cd5e4d3c35e..f1f4234ad730 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp @@ -1,10 +1,10 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "disk_io_metrics.h" -#include +#include -using search::CacheDiskIoStats; using search::DiskIoStats; +using search::FieldIndexIoStats; namespace proton { @@ -27,10 +27,10 @@ DiskIoMetrics::SearchMetrics::SearchMetrics(metrics::MetricSet* parent) DiskIoMetrics::SearchMetrics::~SearchMetrics() = default; void -DiskIoMetrics::SearchMetrics::update(const CacheDiskIoStats& cache_disk_io_stats) +DiskIoMetrics::SearchMetrics::update(const FieldIndexIoStats& io_stats) { - update_helper(_read_bytes, cache_disk_io_stats.read()); - update_helper(_cached_read_bytes, cache_disk_io_stats.cached_read()); + update_helper(_read_bytes, io_stats.read()); + update_helper(_cached_read_bytes, io_stats.cached_read()); } DiskIoMetrics::DiskIoMetrics(metrics::MetricSet* parent) diff --git a/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h index b03969aee75e..25f91243f036 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h @@ -5,7 +5,7 @@ #include #include -namespace search { class CacheDiskIoStats; } +namespace search { class FieldIndexIoStats; } namespace proton { @@ -20,7 +20,7 @@ class DiskIoMetrics : public metrics::MetricSet { public: explicit SearchMetrics(metrics::MetricSet* parent); ~SearchMetrics() override; - void update(const search::CacheDiskIoStats& cache_disk_io_stats); + void update(const search::FieldIndexIoStats& io_stats); }; SearchMetrics _search; @@ -28,7 +28,7 @@ class DiskIoMetrics : public metrics::MetricSet { public: explicit DiskIoMetrics(metrics::MetricSet* parent); ~DiskIoMetrics() override; - void update(const search::CacheDiskIoStats& cache_disk_io_stats) { _search.update(cache_disk_io_stats); } + void update(const search::FieldIndexIoStats& io_stats) { _search.update(io_stats); } }; } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp index d62f22a52710..b48d7c46e892 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "index_metrics_entry.h" -#include +#include namespace proton { diff --git a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h index 03d9bb7d5506..4454a1947ab8 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h @@ -17,7 +17,7 @@ class IndexMetricsEntry : public FieldMetricsEntry { public: explicit IndexMetricsEntry(const std::string& field_name); ~IndexMetricsEntry() override; - void update_disk_io(const search::CacheDiskIoStats& cache_disk_io_stats) { _disk_io.update(cache_disk_io_stats); } + void update_disk_io(const search::FieldIndexIoStats& io_stats) { _disk_io.update(io_stats); } }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp index 0ee7ffb99479..cbe3157ba6ac 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp @@ -80,15 +80,15 @@ updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::SearchableSta updateMemoryUsageMetrics(indexMetrics.memoryUsage, stats.memoryUsage(), totalStats); indexMetrics.docsInMemory.set(stats.docsInMemory()); auto& field_metrics = metrics.ready.index; - search::CacheDiskIoStats disk_io; + search::FieldIndexIoStats disk_io; for (auto& field : stats.get_field_stats()) { auto entry = field_metrics.get_field_metrics_entry(field.first); if (entry) { entry->memoryUsage.update(field.second.memory_usage()); entry->disk_usage.set(field.second.size_on_disk()); - entry->update_disk_io(field.second.cache_disk_io_stats()); + entry->update_disk_io(field.second.io_stats()); } - disk_io.merge(field.second.cache_disk_io_stats()); + disk_io.merge(field.second.io_stats()); } indexMetrics.disk_io.update(disk_io); } diff --git a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp index 8d3fff4a7363..6de060bb070c 100644 --- a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp +++ b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp @@ -49,15 +49,15 @@ TEST(SearchableStatsTest, field_stats_can_be_merged) auto read_mixed_5_stats = DiskIoStats().read_operations(5).read_bytes_total(7000).read_bytes_min(1000). read_bytes_max(2700); auto f1_stats = FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000). - cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats)); + io_stats(FieldIndexIoStats().read(read_1000_once_stats)); auto f2_stats1 = FieldIndexStats().memory_usage({400, 200, 60, 10}).size_on_disk(1500). - cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats)); + io_stats(FieldIndexIoStats().read(read_1000_once_stats)); auto f2_stats2 = FieldIndexStats().memory_usage({300, 100, 40, 5}).size_on_disk(500). - cache_disk_io_stats(CacheDiskIoStats().read(read_mixed_4_stats).cached_read(read_2_once_stats)); + io_stats(FieldIndexIoStats().read(read_mixed_4_stats).cached_read(read_2_once_stats)); auto f2_stats3 = FieldIndexStats().memory_usage({700, 300, 100, 15}).size_on_disk(2000). - cache_disk_io_stats(CacheDiskIoStats().read(read_mixed_5_stats).cached_read(read_2_once_stats)); + io_stats(FieldIndexIoStats().read(read_mixed_5_stats).cached_read(read_2_once_stats)); auto f3_stats = FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500). - cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats)); + io_stats(FieldIndexIoStats().read(read_1000_once_stats)); base_stats.add_field_stats("f1", f1_stats).add_field_stats("f2", f2_stats1); SearchableStats added_stats; added_stats.add_field_stats("f2", f2_stats2).add_field_stats("f3", f3_stats); diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index cfe4f142c669..c7a550ac19fc 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -34,13 +34,13 @@ const std::vector field_file_names{ std::atomic FieldIndex::_file_id_source(0); -FieldIndex::LockedCacheDiskIoStats::LockedCacheDiskIoStats() noexcept +FieldIndex::LockedFieldIndexIoStats::LockedFieldIndexIoStats() noexcept : _stats(), _mutex() { } -FieldIndex::LockedCacheDiskIoStats::~LockedCacheDiskIoStats() = default; +FieldIndex::LockedFieldIndexIoStats::~LockedFieldIndexIoStats() = default; FieldIndex::FieldIndex() : _posting_file(), @@ -48,7 +48,7 @@ FieldIndex::FieldIndex() _dict(), _file_id(0), _size_on_disk(0), - _cache_disk_io_stats(std::make_shared()), + _io_stats(std::make_shared()), _posting_list_cache(), _posting_list_cache_enabled(false), _bitvector_cache_enabled(false), @@ -169,7 +169,7 @@ FieldIndex::reuse_files(const FieldIndex& rhs) _bit_vector_dict = rhs._bit_vector_dict; _file_id = rhs._file_id; _size_on_disk = rhs._size_on_disk; - _cache_disk_io_stats = rhs._cache_disk_io_stats; + _io_stats = rhs._io_stats; } PostingListHandle @@ -177,7 +177,7 @@ FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_resu { auto handle = _posting_file->read_posting_list(lookup_result); assert(handle._read_bytes != 0); - _cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes); + _io_stats->add_uncached_read_operation(handle._read_bytes); if (trim) { _posting_file->consider_trim_posting_list(lookup_result, handle, 0.2); // Trim posting list if more than 20% bloat } @@ -212,7 +212,7 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const auto result = _posting_list_cache->read(key, ctx); if (!ctx.cache_miss) { assert(result._read_bytes != 0); - _cache_disk_io_stats->add_cached_read_operation(result._read_bytes); + _io_stats->add_cached_read_operation(result._read_bytes); } return result; } @@ -232,7 +232,7 @@ FieldIndex::read_uncached_bit_vector(BitVectorDictionaryLookupResult lookup_resu ReadStats read_stats; auto result = _bit_vector_dict->read_bitvector(lookup_result, read_stats); assert(read_stats.read_bytes != 0); - _cache_disk_io_stats->add_uncached_read_operation(read_stats.read_bytes); + _io_stats->add_uncached_read_operation(read_stats.read_bytes); return result; } @@ -258,7 +258,7 @@ FieldIndex::read_bit_vector(BitVectorDictionaryLookupResult lookup_result) const IPostingListCache::Context ctx(this); auto result = _posting_list_cache->read(key, ctx); if (!ctx.cache_miss) { - _cache_disk_io_stats->add_cached_read_operation(result->getFileBytes()); + _io_stats->add_cached_read_operation(result->getFileBytes()); } return result; } @@ -281,8 +281,8 @@ FieldIndex::get_field_length_info() const FieldIndexStats FieldIndex::get_stats(bool clear_disk_io_stats) const { - auto cache_disk_io_stats = _cache_disk_io_stats->read_and_maybe_clear(clear_disk_io_stats); - return FieldIndexStats().size_on_disk(_size_on_disk).cache_disk_io_stats(cache_disk_io_stats); + auto io_stats = _io_stats->read_and_maybe_clear(clear_disk_io_stats); + return FieldIndexStats().size_on_disk(_size_on_disk).io_stats(io_stats); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index fb685601b9a4..6aaf09113c77 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -25,13 +25,13 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { using DiskPostingFileReal = Zc4PosOccRandRead; using DiskPostingFileDynamicKReal = ZcPosOccRandRead; - class LockedCacheDiskIoStats { - CacheDiskIoStats _stats; + class LockedFieldIndexIoStats { + FieldIndexIoStats _stats; std::mutex _mutex; public: - LockedCacheDiskIoStats() noexcept; - ~LockedCacheDiskIoStats(); + LockedFieldIndexIoStats() noexcept; + ~LockedFieldIndexIoStats(); void add_uncached_read_operation(uint64_t bytes) { std::lock_guard guard(_mutex); @@ -42,7 +42,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { _stats.add_cached_read_operation(bytes); } - CacheDiskIoStats read_and_maybe_clear(bool clear_disk_io_stats) { + FieldIndexIoStats read_and_maybe_clear(bool clear_disk_io_stats) { std::lock_guard guard(_mutex); return _stats.read_and_maybe_clear(clear_disk_io_stats); } @@ -53,7 +53,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { std::unique_ptr _dict; uint64_t _file_id; uint64_t _size_on_disk; - std::shared_ptr _cache_disk_io_stats; + std::shared_ptr _io_stats; std::shared_ptr _posting_list_cache; bool _posting_list_cache_enabled; bool _bitvector_cache_enabled; diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt index 9d192abad82d..7c0fde6b517d 100644 --- a/searchlib/src/vespa/searchlib/util/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt @@ -2,13 +2,13 @@ vespa_add_library(searchlib_util OBJECT SOURCES bufferwriter.cpp - cache_disk_io_stats.cpp comprbuffer.cpp comprfile.cpp data_buffer_writer.cpp disk_io_stats.cpp dirtraverse.cpp drainingbufferwriter.cpp + field_index_io_stats.cpp field_index_stats.cpp file_with_header.cpp filealign.cpp diff --git a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.cpp b/searchlib/src/vespa/searchlib/util/field_index_io_stats.cpp similarity index 69% rename from searchlib/src/vespa/searchlib/util/cache_disk_io_stats.cpp rename to searchlib/src/vespa/searchlib/util/field_index_io_stats.cpp index 8ec55719f3ad..64f111c08d5e 100644 --- a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.cpp +++ b/searchlib/src/vespa/searchlib/util/field_index_io_stats.cpp @@ -1,11 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "cache_disk_io_stats.h" +#include "field_index_io_stats.h" #include namespace search { -std::ostream& operator<<(std::ostream& os, const CacheDiskIoStats& stats) { +std::ostream& operator<<(std::ostream& os, const FieldIndexIoStats& stats) { os << "{read: " << stats.read() << ", cached_read: " << stats.cached_read() << "}"; return os; } diff --git a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h b/searchlib/src/vespa/searchlib/util/field_index_io_stats.h similarity index 65% rename from searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h rename to searchlib/src/vespa/searchlib/util/field_index_io_stats.h index 11d1dffa4829..5b65d3c163e8 100644 --- a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h +++ b/searchlib/src/vespa/searchlib/util/field_index_io_stats.h @@ -6,33 +6,33 @@ namespace search { /* - * Class tracking disk io when using a cache. + * Class tracking disk io for a single field. */ -class CacheDiskIoStats { +class FieldIndexIoStats { DiskIoStats _read; // cache miss DiskIoStats _cached_read; // cache hit public: - CacheDiskIoStats() noexcept + FieldIndexIoStats() noexcept : _read(), _cached_read() { } - CacheDiskIoStats& read(const DiskIoStats& value) { _read = value; return *this; } - CacheDiskIoStats& cached_read(DiskIoStats& value) { _cached_read = value; return *this; } + FieldIndexIoStats& read(const DiskIoStats& value) { _read = value; return *this; } + FieldIndexIoStats& cached_read(DiskIoStats& value) { _cached_read = value; return *this; } const DiskIoStats& read() const noexcept { return _read; } const DiskIoStats& cached_read() const noexcept { return _cached_read; } - void merge(const CacheDiskIoStats& rhs) noexcept { + void merge(const FieldIndexIoStats& rhs) noexcept { _read.merge(rhs.read()); _cached_read.merge(rhs.cached_read()); } - bool operator==(const CacheDiskIoStats &rhs) const noexcept { + bool operator==(const FieldIndexIoStats &rhs) const noexcept { return _read == rhs.read() && _cached_read == rhs.cached_read(); } - CacheDiskIoStats read_and_maybe_clear(bool clear_disk_io_stats) noexcept { + FieldIndexIoStats read_and_maybe_clear(bool clear_disk_io_stats) noexcept { auto result = *this; if (clear_disk_io_stats) { clear(); @@ -47,6 +47,6 @@ class CacheDiskIoStats { void add_cached_read_operation(uint64_t bytes) noexcept { _cached_read.add_read_operation(bytes); } }; -std::ostream& operator<<(std::ostream& os, const CacheDiskIoStats& stats); +std::ostream& operator<<(std::ostream& os, const FieldIndexIoStats& stats); } diff --git a/searchlib/src/vespa/searchlib/util/field_index_stats.cpp b/searchlib/src/vespa/searchlib/util/field_index_stats.cpp index 96f57bae2b30..c28363ce7585 100644 --- a/searchlib/src/vespa/searchlib/util/field_index_stats.cpp +++ b/searchlib/src/vespa/searchlib/util/field_index_stats.cpp @@ -7,7 +7,7 @@ namespace search { std::ostream& operator<<(std::ostream& os, const FieldIndexStats& stats) { os << "{memory: " << stats.memory_usage() << ", disk: " << stats.size_on_disk() << - ", diskio: " << stats.cache_disk_io_stats() << "}"; + ", diskio: " << stats.io_stats() << "}"; return os; } diff --git a/searchlib/src/vespa/searchlib/util/field_index_stats.h b/searchlib/src/vespa/searchlib/util/field_index_stats.h index 9c153bcf4ca1..8b14de204514 100644 --- a/searchlib/src/vespa/searchlib/util/field_index_stats.h +++ b/searchlib/src/vespa/searchlib/util/field_index_stats.h @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include "cache_disk_io_stats.h" +#include "field_index_io_stats.h" #include #include @@ -15,13 +15,13 @@ class FieldIndexStats private: vespalib::MemoryUsage _memory_usage; size_t _size_on_disk; // in bytes - CacheDiskIoStats _cache_disk_io_stats; + FieldIndexIoStats _io_stats; public: FieldIndexStats() noexcept : _memory_usage(), _size_on_disk(0), - _cache_disk_io_stats() + _io_stats() {} FieldIndexStats &memory_usage(const vespalib::MemoryUsage &usage) noexcept { _memory_usage = usage; @@ -34,19 +34,19 @@ class FieldIndexStats } size_t size_on_disk() const noexcept { return _size_on_disk; } - FieldIndexStats& cache_disk_io_stats(const CacheDiskIoStats& stats) { _cache_disk_io_stats = stats; return *this; } - const CacheDiskIoStats& cache_disk_io_stats() const noexcept { return _cache_disk_io_stats; } + FieldIndexStats& io_stats(const FieldIndexIoStats& stats) { _io_stats = stats; return *this; } + const FieldIndexIoStats& io_stats() const noexcept { return _io_stats; } void merge(const FieldIndexStats &rhs) noexcept { _memory_usage.merge(rhs._memory_usage); _size_on_disk += rhs._size_on_disk; - _cache_disk_io_stats.merge(rhs._cache_disk_io_stats); + _io_stats.merge(rhs._io_stats); } bool operator==(const FieldIndexStats& rhs) const noexcept { return _memory_usage == rhs._memory_usage && - _size_on_disk == rhs._size_on_disk && - _cache_disk_io_stats == rhs._cache_disk_io_stats; + _size_on_disk == rhs._size_on_disk && + _io_stats == rhs._io_stats; } }; From 50284e553bdde001b23576ff6fca9e356fddb2e3 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 21 Nov 2024 13:19:15 +0100 Subject: [PATCH 109/126] Rename SearchableStats to IndexStats. --- .../tests/proton/documentdb/feedview_test.cpp | 2 +- .../proton/index/diskindexwrapper.h | 2 +- .../searchcore/proton/index/indexmanager.h | 4 ++-- .../proton/index/memoryindexwrapper.h | 2 +- .../server/documentdb_metrics_updater.cpp | 6 ++--- .../searchcore/proton/server/idocumentsubdb.h | 4 ++-- .../proton/server/searchabledocsubdb.cpp | 8 +++---- .../proton/server/searchabledocsubdb.h | 2 +- .../proton/server/storeonlydocsubdb.cpp | 4 ++-- .../proton/server/storeonlydocsubdb.h | 2 +- .../proton/test/dummy_document_sub_db.h | 2 +- .../proton/test/mock_index_manager.h | 4 ++-- .../searchcorespi/index/fakeindexsearchable.h | 4 ++-- .../vespa/searchcorespi/index/iindexmanager.h | 4 ++-- .../index/index_manager_explorer.cpp | 6 ++--- .../index/index_searchable_stats.cpp | 4 ++-- .../index/index_searchable_stats.h | 8 +++---- .../searchcorespi/index/indexcollection.cpp | 8 +++---- .../searchcorespi/index/indexcollection.h | 4 ++-- .../searchcorespi/index/indexmaintainer.cpp | 14 +++++------ .../searchcorespi/index/indexmaintainer.h | 4 ++-- .../searchcorespi/index/indexsearchable.h | 6 ++--- .../index/warmupindexcollection.cpp | 6 ++--- .../index/warmupindexcollection.h | 2 +- searchlib/CMakeLists.txt | 2 +- .../memory_index/memory_index_test.cpp | 6 ++--- .../.gitignore | 0 .../src/tests/util/index_stats/CMakeLists.txt | 9 +++++++ .../index_stats_test.cpp} | 23 ++++++++---------- .../util/searchable_stats/CMakeLists.txt | 9 ------- .../vespa/searchlib/diskindex/diskindex.cpp | 4 ++-- .../src/vespa/searchlib/diskindex/diskindex.h | 4 ++-- .../memoryindex/field_index_collection.cpp | 4 ++-- .../memoryindex/field_index_collection.h | 4 ++-- .../searchlib/memoryindex/memory_index.cpp | 2 +- .../searchlib/memoryindex/memory_index.h | 4 ++-- .../src/vespa/searchlib/util/CMakeLists.txt | 2 +- .../{searchable_stats.cpp => index_stats.cpp} | 18 +++++++------- .../{searchable_stats.h => index_stats.h} | 24 +++++++++---------- 39 files changed, 112 insertions(+), 115 deletions(-) rename searchlib/src/tests/util/{searchable_stats => index_stats}/.gitignore (100%) create mode 100644 searchlib/src/tests/util/index_stats/CMakeLists.txt rename searchlib/src/tests/util/{searchable_stats/searchable_stats_test.cpp => index_stats/index_stats_test.cpp} (87%) delete mode 100644 searchlib/src/tests/util/searchable_stats/CMakeLists.txt rename searchlib/src/vespa/searchlib/util/{searchable_stats.cpp => index_stats.cpp} (75%) rename searchlib/src/vespa/searchlib/util/{searchable_stats.h => index_stats.h} (62%) diff --git a/searchcore/src/tests/proton/documentdb/feedview_test.cpp b/searchcore/src/tests/proton/documentdb/feedview_test.cpp index c4a60fd59bc0..38094600329c 100644 --- a/searchcore/src/tests/proton/documentdb/feedview_test.cpp +++ b/searchcore/src/tests/proton/documentdb/feedview_test.cpp @@ -50,7 +50,7 @@ using search::DocumentMetaData; using vespalib::IDestructorCallback; using vespalib::Gate; using vespalib::GateCallback; -using search::SearchableStats; +using search::IndexStats; using search::test::DocBuilder; using search::test::SchemaBuilder; using searchcorespi::IndexSearchable; diff --git a/searchcore/src/vespa/searchcore/proton/index/diskindexwrapper.h b/searchcore/src/vespa/searchcore/proton/index/diskindexwrapper.h index b7e7b82ceb49..95081f08b9cf 100644 --- a/searchcore/src/vespa/searchcore/proton/index/diskindexwrapper.h +++ b/searchcore/src/vespa/searchcore/proton/index/diskindexwrapper.h @@ -31,7 +31,7 @@ class DiskIndexWrapper : public searchcorespi::index::IDiskIndex { createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) override { return _index.createBlueprint(requestContext, fields, term); } - search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override { + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override { return _index.get_stats(clear_disk_io_stats); } diff --git a/searchcore/src/vespa/searchcore/proton/index/indexmanager.h b/searchcore/src/vespa/searchcore/proton/index/indexmanager.h index 37524491a08a..08828d8dc0ac 100644 --- a/searchcore/src/vespa/searchcore/proton/index/indexmanager.h +++ b/searchcore/src/vespa/searchcore/proton/index/indexmanager.h @@ -120,8 +120,8 @@ class IndexManager : public searchcorespi::IIndexManager return _maintainer.getSearchable(); } - search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override { - return _maintainer.getSearchableStats(clear_disk_io_stats); + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override { + return _maintainer.get_index_stats(clear_disk_io_stats); } searchcorespi::IFlushTarget::List getFlushTargets() override { diff --git a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h index a3ce0f5603e4..9b7f781edc8b 100644 --- a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h +++ b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h @@ -49,7 +49,7 @@ class MemoryIndexWrapper : public searchcorespi::index::IMemoryIndex { { return _index.createBlueprint(requestContext, fields, term); } - search::SearchableStats getSearchableStats(bool) const override { + search::IndexStats get_index_stats(bool) const override { return _index.get_stats(); } diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp index cbe3157ba6ac..1c5b859bc830 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -73,7 +73,7 @@ updateDiskUsageMetric(metrics::LongValueMetric &metric, uint64_t diskUsage, Tota } void -updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::SearchableStats &stats, TotalStats &totalStats) +updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::IndexStats &stats, TotalStats &totalStats) { DocumentDBTaggedMetrics::IndexMetrics &indexMetrics = metrics.index; updateDiskUsageMetric(indexMetrics.diskUsage, stats.sizeOnDisk(), totalStats); @@ -303,7 +303,7 @@ DocumentDBMetricsUpdater::updateMetrics(const metrics::MetricLockGuard & guard, { TotalStats totalStats; ExecutorThreadingServiceStats threadingServiceStats = _writeService.getStats(); - updateIndexMetrics(metrics, _subDBs.getReadySubDB()->getSearchableStats(true), totalStats); + updateIndexMetrics(metrics, _subDBs.getReadySubDB()->get_index_stats(true), totalStats); updateAttributeMetrics(metrics, _subDBs, totalStats); updateMatchingMetrics(guard, metrics, *_subDBs.getReadySubDB()); updateDocumentsMetrics(metrics, _subDBs); diff --git a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h index 05d0d34fa84d..f02f3495bce1 100644 --- a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -122,7 +122,7 @@ class IDocumentSubDB virtual SerialNum getNewestFlushedSerial() = 0; virtual void pruneRemovedFields(SerialNum serialNum) = 0; virtual void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) = 0; - virtual search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const = 0; + virtual search::IndexStats get_index_stats(bool clear_disk_io_stats) const = 0; virtual std::shared_ptr getDocumentRetriever() = 0; virtual matching::MatchingStats getMatcherStats(const std::string &rankProfile) const = 0; diff --git a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp index ae53eb8372e6..4ac1ad837dea 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp @@ -318,10 +318,10 @@ SearchableDocSubDB::getNumActiveDocs() const return (metaStoreCtx) ? metaStoreCtx->getReadGuard()->get().getNumActiveLids() : 0; } -search::SearchableStats -SearchableDocSubDB::getSearchableStats(bool clear_disk_io_stats) const +search::IndexStats +SearchableDocSubDB::get_index_stats(bool clear_disk_io_stats) const { - return _indexMgr ? _indexMgr->getSearchableStats(clear_disk_io_stats) : search::SearchableStats(); + return _indexMgr ? _indexMgr->get_index_stats(clear_disk_io_stats) : search::IndexStats(); } std::shared_ptr @@ -375,7 +375,7 @@ SearchableDocSubDB::get_transient_resource_usage() const auto result = FastAccessDocSubDB::get_transient_resource_usage(); // Transient disk usage is measured as the total disk usage of all current fusion indexes. // Transient memory usage is measured as the total memory usage of all memory indexes. - auto stats = getSearchableStats(false); + auto stats = get_index_stats(false); result.merge({stats.fusion_size_on_disk(), stats.memoryUsage().allocatedBytes()}); return result; } diff --git a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h index a34a11861423..5cae45e9f5f9 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h @@ -134,7 +134,7 @@ SearchableDocSubDB : public FastAccessDocSubDB, SerialNum getNewestFlushedSerial() override; void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) override; size_t getNumActiveDocs() const override; - search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override ; + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override ; std::shared_ptr getDocumentRetriever() override; matching::MatchingStats getMatcherStats(const std::string &rankProfile) const override; void close() override; diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp index 3c6dc91b8860..6cdc3de2b6c9 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp @@ -543,8 +543,8 @@ StoreOnlyDocSubDB::setIndexSchema(std::shared_ptr, SerialNum) assert(_writeService.master().isCurrentThread()); } -search::SearchableStats -StoreOnlyDocSubDB::getSearchableStats(bool) const +search::IndexStats +StoreOnlyDocSubDB::get_index_stats(bool) const { return {}; } diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h index 0d22ac4598a5..d96c0a6a1cf3 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h @@ -231,7 +231,7 @@ class StoreOnlyDocSubDB : public DocSubDB void pruneRemovedFields(SerialNum serialNum) override; void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) override; - search::SearchableStats getSearchableStats(bool) const override; + search::IndexStats get_index_stats(bool) const override; std::shared_ptr getDocumentRetriever() override; matching::MatchingStats getMatcherStats(const std::string &rankProfile) const override; void close() override; diff --git a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h index 70f592667557..9d5d56b003a1 100644 --- a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h +++ b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h @@ -74,7 +74,7 @@ struct DummyDocumentSubDb : public IDocumentSubDB SerialNum getNewestFlushedSerial() override { return 0; } void pruneRemovedFields(SerialNum) override { } void setIndexSchema(std::shared_ptr, SerialNum) override { } - search::SearchableStats getSearchableStats(bool) const override { + search::IndexStats get_index_stats(bool) const override { return {}; } std::shared_ptr getDocumentRetriever() override { diff --git a/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h b/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h index f69c11971e47..38f14c4e5df9 100644 --- a/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h +++ b/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h @@ -19,8 +19,8 @@ struct MockIndexManager : public searchcorespi::IIndexManager searchcorespi::IndexSearchable::SP getSearchable() const override { return searchcorespi::IndexSearchable::SP(); } - search::SearchableStats getSearchableStats(bool) const override { - return search::SearchableStats(); + search::IndexStats get_index_stats(bool) const override { + return search::IndexStats(); } searchcorespi::IFlushTarget::List getFlushTargets() override { return searchcorespi::IFlushTarget::List(); diff --git a/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h b/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h index 2696fa8176fd..9379e1da7226 100644 --- a/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h +++ b/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h @@ -28,8 +28,8 @@ class FakeIndexSearchable : public IndexSearchable { return _fake.createBlueprint(requestContext, field, term); } - search::SearchableStats getSearchableStats(bool) const override { - return search::SearchableStats(); + search::IndexStats get_index_stats(bool) const override { + return search::IndexStats(); } search::SerialNum getSerialNum() const override { return 0; } diff --git a/searchcore/src/vespa/searchcorespi/index/iindexmanager.h b/searchcore/src/vespa/searchcorespi/index/iindexmanager.h index 4accba57d0e5..b441ce0715c5 100644 --- a/searchcore/src/vespa/searchcorespi/index/iindexmanager.h +++ b/searchcore/src/vespa/searchcorespi/index/iindexmanager.h @@ -174,11 +174,11 @@ class IIndexManager { virtual IndexSearchable::SP getSearchable() const = 0; /** - * Returns searchable stats for this index manager. + * Returns index stats for this index manager. * * @return statistics gathered about underlying memory and disk indexes. */ - virtual search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const = 0; + virtual search::IndexStats get_index_stats(bool clear_disk_io_stats) const = 0; /** * Returns the list of all flush targets contained in this index manager. diff --git a/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp b/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp index 051175be9afb..e16df0a0bb6e 100644 --- a/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp +++ b/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp @@ -8,7 +8,7 @@ using vespalib::slime::Cursor; using vespalib::slime::Inserter; -using search::SearchableStats; +using search::IndexStats; using searchcorespi::index::DiskIndexStats; using searchcorespi::index::MemoryIndexStats; @@ -20,7 +20,7 @@ void insertDiskIndex(Cursor &arrayCursor, const DiskIndexStats &diskIndex) { Cursor &diskIndexCursor = arrayCursor.addObject(); - const SearchableStats &sstats = diskIndex.getSearchableStats(); + const IndexStats &sstats = diskIndex.get_index_stats(); diskIndexCursor.setLong("serialNum", diskIndex.getSerialNum()); diskIndexCursor.setString("indexDir", diskIndex.getIndexdir()); diskIndexCursor.setLong("sizeOnDisk", sstats.sizeOnDisk()); @@ -40,7 +40,7 @@ void insertMemoryIndex(Cursor &arrayCursor, const MemoryIndexStats &memoryIndex) { Cursor &memoryIndexCursor = arrayCursor.addObject(); - const SearchableStats &sstats = memoryIndex.getSearchableStats(); + const IndexStats &sstats = memoryIndex.get_index_stats(); memoryIndexCursor.setLong("serialNum", memoryIndex.getSerialNum()); memoryIndexCursor.setLong("docsInMemory", sstats.docsInMemory()); insertMemoryUsage(memoryIndexCursor, sstats.memoryUsage()); diff --git a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp index 37c30ad9ddd7..fb916058934d 100644 --- a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp +++ b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp @@ -8,13 +8,13 @@ namespace searchcorespi::index { IndexSearchableStats::IndexSearchableStats() : _serialNum(0), - _searchableStats() + _index_stats() { } IndexSearchableStats::IndexSearchableStats(const IndexSearchable &index) : _serialNum(index.getSerialNum()), - _searchableStats(index.getSearchableStats(false)) + _index_stats(index.get_index_stats(false)) { } diff --git a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.h b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.h index 023649524706..02f68cf8735c 100644 --- a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.h +++ b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.h @@ -3,7 +3,7 @@ #pragma once #include -#include +#include namespace searchcorespi { class IndexSearchable; } @@ -15,15 +15,15 @@ namespace searchcorespi::index { class IndexSearchableStats { using SerialNum = search::SerialNum; - using SearchableStats = search::SearchableStats; + using IndexStats = search::IndexStats; SerialNum _serialNum; - SearchableStats _searchableStats; + IndexStats _index_stats; public: IndexSearchableStats(); IndexSearchableStats(const IndexSearchable &index); bool operator<(const IndexSearchableStats &rhs) const; SerialNum getSerialNum() const { return _serialNum; } - const SearchableStats &getSearchableStats() const { return _searchableStats; } + const IndexStats &get_index_stats() const { return _index_stats; } }; } diff --git a/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp b/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp index 0647e366a975..b6183e34ee13 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp @@ -108,12 +108,12 @@ IndexCollection::getSourceId(uint32_t i) const return _sources[i].id; } -search::SearchableStats -IndexCollection::getSearchableStats(bool clear_disk_io_stats) const +search::IndexStats +IndexCollection::get_index_stats(bool clear_disk_io_stats) const { - search::SearchableStats stats; + search::IndexStats stats; for (size_t i = 0; i < _sources.size(); ++i) { - stats.merge(_sources[i].source_wrapper->getSearchableStats(clear_disk_io_stats)); + stats.merge(_sources[i].source_wrapper->get_index_stats(clear_disk_io_stats)); } return stats; } diff --git a/searchcore/src/vespa/searchcorespi/index/indexcollection.h b/searchcore/src/vespa/searchcorespi/index/indexcollection.h index 1ad57bc6eb87..455f41637f2b 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexcollection.h +++ b/searchcore/src/vespa/searchcorespi/index/indexcollection.h @@ -3,7 +3,7 @@ #pragma once #include "isearchableindexcollection.h" -#include +#include namespace searchcorespi { @@ -50,7 +50,7 @@ class IndexCollection : public ISearchableIndexCollection createBlueprint(const IRequestContext & requestContext, const FieldSpec &field, const Node &term) override; std::unique_ptr createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) override; - search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override; + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override; void accept(IndexSearchableVisitor &visitor) const override; diff --git a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp index c7960be2568b..147b7eeef69b 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp @@ -135,7 +135,7 @@ class DiskIndexWithDestructorCallback : public IDiskIndex { { return _index->createBlueprint(requestContext, fields, term); } - search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override; + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override { return _index->getSerialNum(); } @@ -160,10 +160,10 @@ class DiskIndexWithDestructorCallback : public IDiskIndex { DiskIndexWithDestructorCallback::~DiskIndexWithDestructorCallback() = default; -search::SearchableStats -DiskIndexWithDestructorCallback::getSearchableStats(bool clear_disk_io_stats) const +search::IndexStats +DiskIndexWithDestructorCallback::get_index_stats(bool clear_disk_io_stats) const { - auto stats = _index->getSearchableStats(clear_disk_io_stats); + auto stats = _index->get_index_stats(clear_disk_io_stats); uint64_t transient_size = _disk_indexes.get_transient_size(_layout, _index_disk_dir); stats.fusion_size_on_disk(transient_size); return stats; @@ -315,7 +315,7 @@ IndexMaintainer::loadDiskIndex(const string &indexDir) } vespalib::Timer timer; auto index = _operations.loadDiskIndex(indexDir); - auto stats = index->getSearchableStats(false); + auto stats = index->get_index_stats(false); _disk_indexes->setActive(indexDir, stats.sizeOnDisk()); auto retval = std::make_shared( std::move(index), @@ -338,7 +338,7 @@ IndexMaintainer::reloadDiskIndex(const IDiskIndex &oldIndex) vespalib::Timer timer; const IDiskIndex &wrappedDiskIndex = (dynamic_cast(oldIndex)).getWrapped(); auto index = _operations.reloadDiskIndex(wrappedDiskIndex); - auto stats = index->getSearchableStats(false); + auto stats = index->get_index_stats(false); _disk_indexes->setActive(indexDir, stats.sizeOnDisk()); auto retval = std::make_shared( std::move(index), @@ -1184,7 +1184,7 @@ IndexMaintainer::getFusionStats() const source_list = _source_list; stats.maxFlushed = _maxFlushed; } - stats.diskUsage = source_list->getSearchableStats(false).sizeOnDisk(); + stats.diskUsage = source_list->get_index_stats(false).sizeOnDisk(); { LockGuard guard(_fusion_lock); stats.numUnfused = _fusion_spec.flush_ids.size() + ((_fusion_spec.last_fusion_id != 0) ? 1 : 0); diff --git a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h index 0e429aeee833..a713755d64ec 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h +++ b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h @@ -361,9 +361,9 @@ class IndexMaintainer : public IIndexManager, return _source_list; } - search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override { + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override { LockGuard lock(_new_search_lock); - return _source_list->getSearchableStats(clear_disk_io_stats); + return _source_list->get_index_stats(clear_disk_io_stats); } IFlushTarget::List getFlushTargets() override; diff --git a/searchcore/src/vespa/searchcorespi/index/indexsearchable.h b/searchcore/src/vespa/searchcorespi/index/indexsearchable.h index d157a8c341dd..5811d0f812c4 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexsearchable.h +++ b/searchcore/src/vespa/searchcorespi/index/indexsearchable.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace searchcorespi { @@ -38,9 +38,9 @@ class IndexSearchable : public search::queryeval::Searchable, using SP = std::shared_ptr; /** - * Returns the searchable stats for this index searchable. + * Returns the index stats for this index searchable. */ - virtual search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const = 0; + virtual search::IndexStats get_index_stats(bool clear_disk_io_stats) const = 0; /** * Returns the serial number for this index searchable. diff --git a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp index cdd6f2bc892d..0da8518f8219 100644 --- a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp +++ b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp @@ -225,10 +225,10 @@ WarmupIndexCollection::createBlueprint(const IRequestContext & requestContext, return _prev->createBlueprint(requestContext, fields, term); } -search::SearchableStats -WarmupIndexCollection::getSearchableStats(bool clear_disk_io_stats) const +search::IndexStats +WarmupIndexCollection::get_index_stats(bool clear_disk_io_stats) const { - return _prev->getSearchableStats(clear_disk_io_stats); + return _prev->get_index_stats(clear_disk_io_stats); } diff --git a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h index a0936a26581f..28c5c3bcf4a0 100644 --- a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h +++ b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h @@ -48,7 +48,7 @@ class WarmupIndexCollection : public ISearchableIndexCollection, createBlueprint(const IRequestContext & requestContext, const FieldSpec &field, const Node &term) override; std::unique_ptr createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) override; - search::SearchableStats getSearchableStats(bool clear_disk_io_stats) const override; + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override; void accept(IndexSearchableVisitor &visitor) const override; diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 851e480d7832..d060fa489a7e 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -253,7 +253,7 @@ vespa_define_module( src/tests/util src/tests/util/bufferwriter src/tests/util/folded_string_compare - src/tests/util/searchable_stats + src/tests/util/index_stats src/tests/util/slime_output_raw_buf_adapter src/tests/util/token_extractor src/tests/vespa-fileheader-inspect diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp index 8f88b108e977..2354f5bdfb1c 100644 --- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp +++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -36,7 +36,7 @@ using document::DataType; using document::Document; using document::FieldValue; using search::FieldIndexStats; -using search::SearchableStats; +using search::IndexStats; using search::ScheduleTaskCallback; using search::index::FieldLengthInfo; using search::index::IFieldLengthInspector; @@ -471,7 +471,7 @@ TEST(MemoryIndexTest, require_that_num_docs_and_doc_id_limit_is_returned) namespace { -FieldIndexStats get_field_stats(const SearchableStats &stats, const std::string& field_name) +FieldIndexStats get_field_stats(const IndexStats &stats, const std::string& field_name) { auto itr = stats.get_field_stats().find(field_name); return itr == stats.get_field_stats().end() ? FieldIndexStats() : itr->second; diff --git a/searchlib/src/tests/util/searchable_stats/.gitignore b/searchlib/src/tests/util/index_stats/.gitignore similarity index 100% rename from searchlib/src/tests/util/searchable_stats/.gitignore rename to searchlib/src/tests/util/index_stats/.gitignore diff --git a/searchlib/src/tests/util/index_stats/CMakeLists.txt b/searchlib/src/tests/util/index_stats/CMakeLists.txt new file mode 100644 index 000000000000..9bc9f792f240 --- /dev/null +++ b/searchlib/src/tests/util/index_stats/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_index_stats_test_app TEST + SOURCES + index_stats_test.cpp + DEPENDS + vespa_searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_index_stats_test_app COMMAND searchlib_index_stats_test_app) diff --git a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp b/searchlib/src/tests/util/index_stats/index_stats_test.cpp similarity index 87% rename from searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp rename to searchlib/src/tests/util/index_stats/index_stats_test.cpp index 6de060bb070c..b84dbdbfd4f3 100644 --- a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp +++ b/searchlib/src/tests/util/index_stats/index_stats_test.cpp @@ -1,21 +1,18 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include +#include #include -#include -LOG_SETUP("searchable_stats_test"); - using namespace search; -TEST(SearchableStatsTest, stats_can_be_merged) +TEST(IndexStatsTest, stats_can_be_merged) { - SearchableStats stats; + IndexStats stats; EXPECT_EQ(0u, stats.memoryUsage().allocatedBytes()); EXPECT_EQ(0u, stats.docsInMemory()); EXPECT_EQ(0u, stats.sizeOnDisk()); EXPECT_EQ(0u, stats.fusion_size_on_disk()); { - SearchableStats rhs; + IndexStats rhs; EXPECT_EQ(&rhs.memoryUsage(vespalib::MemoryUsage(100,0,0,0)), &rhs); EXPECT_EQ(&rhs.docsInMemory(10), &rhs); EXPECT_EQ(&rhs.sizeOnDisk(1000), &rhs); @@ -27,7 +24,7 @@ TEST(SearchableStatsTest, stats_can_be_merged) EXPECT_EQ(1000u, stats.sizeOnDisk()); EXPECT_EQ(500u, stats.fusion_size_on_disk()); - stats.merge(SearchableStats() + stats.merge(IndexStats() .memoryUsage(vespalib::MemoryUsage(150,0,0,0)) .docsInMemory(15) .sizeOnDisk(1500) @@ -38,9 +35,9 @@ TEST(SearchableStatsTest, stats_can_be_merged) EXPECT_EQ(1300u, stats.fusion_size_on_disk()); } -TEST(SearchableStatsTest, field_stats_can_be_merged) +TEST(IndexStatsTest, field_stats_can_be_merged) { - SearchableStats base_stats; + IndexStats base_stats; auto read_2_once_stats = DiskIoStats().read_operations(1).read_bytes_total(2).read_bytes_min(2).read_bytes_max(2); auto read_1000_once_stats = DiskIoStats().read_operations(1).read_bytes_total(1000).read_bytes_min(1000). read_bytes_max(1000); @@ -59,11 +56,11 @@ TEST(SearchableStatsTest, field_stats_can_be_merged) auto f3_stats = FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500). io_stats(FieldIndexIoStats().read(read_1000_once_stats)); base_stats.add_field_stats("f1", f1_stats).add_field_stats("f2", f2_stats1); - SearchableStats added_stats; + IndexStats added_stats; added_stats.add_field_stats("f2", f2_stats2).add_field_stats("f3", f3_stats); - SearchableStats act_stats = base_stats; + IndexStats act_stats = base_stats; act_stats.merge(added_stats); - SearchableStats exp_stats; + IndexStats exp_stats; exp_stats.add_field_stats("f1", f1_stats). add_field_stats("f2", f2_stats3). add_field_stats("f3", f3_stats); diff --git a/searchlib/src/tests/util/searchable_stats/CMakeLists.txt b/searchlib/src/tests/util/searchable_stats/CMakeLists.txt deleted file mode 100644 index a091f32ef0f5..000000000000 --- a/searchlib/src/tests/util/searchable_stats/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_searchable_stats_test_app TEST - SOURCES - searchable_stats_test.cpp - DEPENDS - vespa_searchlib - GTest::GTest -) -vespa_add_test(NAME searchlib_searchable_stats_test_app COMMAND searchlib_searchable_stats_test_app) diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index 77432163ac6f..9c620d6f932c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -398,10 +398,10 @@ DiskIndex::get_field_length_info(const std::string& field_name) const } } -SearchableStats +IndexStats DiskIndex::get_stats(bool clear_disk_io_stats) const { - SearchableStats stats; + IndexStats stats; uint64_t size_on_disk = _nonfield_size_on_disk; uint32_t field_id = 0; for (auto& field_index : _field_indexes) { diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h index 226515e7fd0b..132666b9558e 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.h +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h @@ -4,7 +4,7 @@ #include "field_index.h" #include -#include +#include #include #include #include @@ -110,7 +110,7 @@ class DiskIndex : public queryeval::Searchable { /** * Get stats for this index. */ - SearchableStats get_stats(bool clear_disk_io_stats) const; + IndexStats get_stats(bool clear_disk_io_stats) const; const index::Schema &getSchema() const { return _schema; } const std::string &getIndexDir() const { return _indexDir; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp index d78eaac8eb5f..84d5db25f40f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp @@ -59,10 +59,10 @@ FieldIndexCollection::getMemoryUsage() const return usage; } -SearchableStats +IndexStats FieldIndexCollection::get_stats(const index::Schema& schema) const { - SearchableStats stats; + IndexStats stats; vespalib::MemoryUsage memory_usage; for (uint32_t field_id = 0; field_id < _numFields; ++field_id) { auto &field_index = _fieldIndexes[field_id]; diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h index 5c8ae2603396..dd7faac4d437 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h @@ -4,7 +4,7 @@ #include "i_field_index_collection.h" #include "i_field_index.h" -#include +#include #include #include @@ -47,7 +47,7 @@ class FieldIndexCollection : public IFieldIndexCollection { void dump(search::index::IndexBuilder & indexBuilder); vespalib::MemoryUsage getMemoryUsage() const; - SearchableStats get_stats(const index::Schema& schema) const; + IndexStats get_stats(const index::Schema& schema) const; IFieldIndex *getFieldIndex(uint32_t fieldId) const { return _fieldIndexes[fieldId].get(); diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp index d550e068c8a5..16e7c43f2508 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp @@ -213,7 +213,7 @@ MemoryIndex::getMemoryUsage() const return usage; } -SearchableStats +IndexStats MemoryIndex::get_stats() const { auto stats = _fieldIndexes->get_stats(_schema); diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h index 45bd037e41d7..c95870d631d2 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -170,7 +170,7 @@ class MemoryIndex : public queryeval::Searchable { */ vespalib::MemoryUsage getMemoryUsage() const; - SearchableStats get_stats() const; + IndexStats get_stats() const; uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; } diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt index 7c0fde6b517d..8916565bd492 100644 --- a/searchlib/src/vespa/searchlib/util/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt @@ -17,10 +17,10 @@ vespa_add_library(searchlib_util OBJECT filesizecalculator.cpp fileutil.cpp foldedstringcompare.cpp + index_stats.cpp linguisticsannotation.cpp logutil.cpp rawbuf.cpp - searchable_stats.cpp slime_output_raw_buf_adapter.cpp state_explorer_utils.cpp token_extractor.cpp diff --git a/searchlib/src/vespa/searchlib/util/searchable_stats.cpp b/searchlib/src/vespa/searchlib/util/index_stats.cpp similarity index 75% rename from searchlib/src/vespa/searchlib/util/searchable_stats.cpp rename to searchlib/src/vespa/searchlib/util/index_stats.cpp index d82e1ccf3e3f..2fbc11dfa10c 100644 --- a/searchlib/src/vespa/searchlib/util/searchable_stats.cpp +++ b/searchlib/src/vespa/searchlib/util/index_stats.cpp @@ -1,11 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "searchable_stats.h" +#include "index_stats.h" #include namespace search { -SearchableStats::SearchableStats() +IndexStats::IndexStats() : _memoryUsage(), _docsInMemory(0), _sizeOnDisk(0), @@ -14,10 +14,10 @@ SearchableStats::SearchableStats() { } -SearchableStats::~SearchableStats() = default; +IndexStats::~IndexStats() = default; -SearchableStats& -SearchableStats::merge(const SearchableStats &rhs) { +IndexStats& +IndexStats::merge(const IndexStats &rhs) { _memoryUsage.merge(rhs._memoryUsage); _docsInMemory += rhs._docsInMemory; _sizeOnDisk += rhs._sizeOnDisk; @@ -29,7 +29,7 @@ SearchableStats::merge(const SearchableStats &rhs) { } bool -SearchableStats::operator==(const SearchableStats& rhs) const noexcept +IndexStats::operator==(const IndexStats& rhs) const noexcept { return _memoryUsage == rhs._memoryUsage && _docsInMemory == rhs._docsInMemory && @@ -38,14 +38,14 @@ SearchableStats::operator==(const SearchableStats& rhs) const noexcept _field_stats == rhs._field_stats; } -SearchableStats& -SearchableStats::add_field_stats(const std::string& name, const FieldIndexStats& stats) +IndexStats& +IndexStats::add_field_stats(const std::string& name, const FieldIndexStats& stats) { _field_stats[name].merge(stats); return *this; } -std::ostream& operator<<(std::ostream& os, const SearchableStats& stats) { +std::ostream& operator<<(std::ostream& os, const IndexStats& stats) { os << "{memory: " << stats.memoryUsage() << ", docsInMemory: " << stats.docsInMemory() << ", disk: " << stats.sizeOnDisk() << ", fusion_size_on_disk: " << stats.fusion_size_on_disk() << ", "; os << "fields: {"; diff --git a/searchlib/src/vespa/searchlib/util/searchable_stats.h b/searchlib/src/vespa/searchlib/util/index_stats.h similarity index 62% rename from searchlib/src/vespa/searchlib/util/searchable_stats.h rename to searchlib/src/vespa/searchlib/util/index_stats.h index 1cbd7645bbdd..b8ee7786efc6 100644 --- a/searchlib/src/vespa/searchlib/util/searchable_stats.h +++ b/searchlib/src/vespa/searchlib/util/index_stats.h @@ -7,11 +7,11 @@ namespace search { /** - * Simple statistics for a single Searchable component or multiple components that are merged together. + * Simple statistics for a single index or for multiple indexes (merged stats). * * E.g. used for internal aggregation before inserting numbers into the metrics framework. **/ -class SearchableStats +class IndexStats { private: vespalib::MemoryUsage _memoryUsage; @@ -21,35 +21,35 @@ class SearchableStats std::map _field_stats; public: - SearchableStats(); - ~SearchableStats(); - SearchableStats &memoryUsage(const vespalib::MemoryUsage &usage) { + IndexStats(); + ~IndexStats(); + IndexStats &memoryUsage(const vespalib::MemoryUsage &usage) { _memoryUsage = usage; return *this; } const vespalib::MemoryUsage &memoryUsage() const { return _memoryUsage; } - SearchableStats &docsInMemory(size_t value) { + IndexStats &docsInMemory(size_t value) { _docsInMemory = value; return *this; } size_t docsInMemory() const { return _docsInMemory; } - SearchableStats &sizeOnDisk(size_t value) { + IndexStats &sizeOnDisk(size_t value) { _sizeOnDisk = value; return *this; } size_t sizeOnDisk() const { return _sizeOnDisk; } - SearchableStats& fusion_size_on_disk(size_t value) { + IndexStats& fusion_size_on_disk(size_t value) { _fusion_size_on_disk = value; return *this; } size_t fusion_size_on_disk() const { return _fusion_size_on_disk; } - SearchableStats& merge(const SearchableStats& rhs); - bool operator==(const SearchableStats& rhs) const noexcept; - SearchableStats& add_field_stats(const std::string& name, const FieldIndexStats& stats); + IndexStats& merge(const IndexStats& rhs); + bool operator==(const IndexStats& rhs) const noexcept; + IndexStats& add_field_stats(const std::string& name, const FieldIndexStats& stats); const std::map& get_field_stats() const noexcept { return _field_stats; } }; -std::ostream& operator<<(std::ostream& os, const SearchableStats& stats); +std::ostream& operator<<(std::ostream& os, const IndexStats& stats); } From 306a1aed6fd1cf68ca8dc787925cf793bb5ce5e5 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 21 Nov 2024 14:07:07 +0100 Subject: [PATCH 110/126] Move last cache stats to cache metrics instance. --- .../proton/metrics/cache_metrics.cpp | 16 +++++++++------- .../searchcore/proton/metrics/cache_metrics.h | 6 +++--- .../server/documentdb_metrics_updater.cpp | 15 ++++++--------- .../server/documentdb_metrics_updater.h | 12 ------------ .../vespa/searchcore/proton/server/proton.cpp | 19 +++---------------- .../vespa/searchcore/proton/server/proton.h | 2 -- 6 files changed, 21 insertions(+), 49 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.cpp index 5c16c668fe56..a512274b80a1 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.cpp @@ -18,7 +18,8 @@ CacheMetrics::CacheMetrics(MetricSet *parent, const std::string& name, const std hitRate("hit_rate", {}, "Rate of hits in the cache compared to number of lookups", this), lookups("lookups", {}, "Number of lookups in the cache (hits + misses)", this), invalidations("invalidations", {}, "Number of invalidations (erased elements) in the cache.", this), - _cache_name(cache_name) + _cache_name(cache_name), + _last_stats() { } @@ -51,13 +52,14 @@ CacheMetrics::update_count_metric(uint64_t currVal, uint64_t lastVal, metrics::L } void -CacheMetrics::update_metrics(const CacheStats& current, const CacheStats& last) +CacheMetrics::update_metrics(const CacheStats& stats) { - memoryUsage.set(current.memory_used); - elements.set(current.elements); - update_hit_rate(current, last); - update_count_metric(current.lookups(), last.lookups(), lookups); - update_count_metric(current.invalidations, last.invalidations, invalidations); + memoryUsage.set(stats.memory_used); + elements.set(stats.elements); + update_hit_rate(stats, _last_stats); + update_count_metric(stats.lookups(), _last_stats.lookups(), lookups); + update_count_metric(stats.invalidations, _last_stats.invalidations, invalidations); + _last_stats = stats; } } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h index 1f55ad45093d..b9066a4a6288 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h @@ -4,10 +4,9 @@ #include #include #include +#include #include -namespace vespalib { struct CacheStats; } - namespace proton { /** @@ -20,6 +19,7 @@ class CacheMetrics : public metrics::MetricSet { metrics::LongCountMetric lookups; metrics::LongCountMetric invalidations; std::string _cache_name; + vespalib::CacheStats _last_stats; void update_hit_rate(const vespalib::CacheStats ¤t, const vespalib::CacheStats &last); static void update_count_metric(uint64_t currVal, uint64_t lastVal, metrics::LongCountMetric &metric); @@ -27,7 +27,7 @@ class CacheMetrics : public metrics::MetricSet { CacheMetrics(metrics::MetricSet* parent, const std::string& name, const std::string& description, const std::string& cache_name); ~CacheMetrics() override; - void update_metrics(const vespalib::CacheStats& current, const vespalib::CacheStats& last); + void update_metrics(const vespalib::CacheStats& stats); }; } diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp index 1c5b859bc830..ea7c9829534c 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp @@ -43,7 +43,6 @@ DocumentDBMetricsUpdater::DocumentDBMetricsUpdater(const DocumentSubDBCollection _jobTrackers(jobTrackers), _writeFilter(writeFilter), _feed_handler(feed_handler), - _lastDocStoreCacheStats(), _last_feed_handler_stats() { } @@ -236,7 +235,6 @@ updateDocumentsMetrics(DocumentDBTaggedMetrics &metrics, const DocumentSubDBColl void updateDocumentStoreMetrics(DocumentDBTaggedMetrics::SubDBMetrics::DocumentStoreMetrics &metrics, const IDocumentSubDB *subDb, - CacheStats &lastCacheStats, TotalStats &totalStats) { const ISummaryManager::SP &summaryMgr = subDb->getSummaryManager(); @@ -249,17 +247,16 @@ updateDocumentStoreMetrics(DocumentDBTaggedMetrics::SubDBMetrics::DocumentStoreM vespalib::CacheStats cacheStats = backingStore.getCacheStats(); totalStats.memoryUsage.incAllocatedBytes(cacheStats.memory_used); - metrics.cache.update_metrics(cacheStats, lastCacheStats); - lastCacheStats = cacheStats; + metrics.cache.update_metrics(cacheStats); } void updateDocumentStoreMetrics(DocumentDBTaggedMetrics &metrics, const DocumentSubDBCollection &subDBs, - DocumentDBMetricsUpdater::DocumentStoreCacheStats &lastDocStoreCacheStats, TotalStats &totalStats) + TotalStats &totalStats) { - updateDocumentStoreMetrics(metrics.ready.documentStore, subDBs.getReadySubDB(), lastDocStoreCacheStats.readySubDb, totalStats); - updateDocumentStoreMetrics(metrics.removed.documentStore, subDBs.getRemSubDB(), lastDocStoreCacheStats.removedSubDb, totalStats); - updateDocumentStoreMetrics(metrics.notReady.documentStore, subDBs.getNotReadySubDB(), lastDocStoreCacheStats.notReadySubDb, totalStats); + updateDocumentStoreMetrics(metrics.ready.documentStore, subDBs.getReadySubDB(), totalStats); + updateDocumentStoreMetrics(metrics.removed.documentStore, subDBs.getRemSubDB(), totalStats); + updateDocumentStoreMetrics(metrics.notReady.documentStore, subDBs.getNotReadySubDB(), totalStats); } template @@ -307,7 +304,7 @@ DocumentDBMetricsUpdater::updateMetrics(const metrics::MetricLockGuard & guard, updateAttributeMetrics(metrics, _subDBs, totalStats); updateMatchingMetrics(guard, metrics, *_subDBs.getReadySubDB()); updateDocumentsMetrics(metrics, _subDBs); - updateDocumentStoreMetrics(metrics, _subDBs, _lastDocStoreCacheStats, totalStats); + updateDocumentStoreMetrics(metrics, _subDBs, totalStats); updateMiscMetrics(metrics, threadingServiceStats); metrics.totalMemoryUsage.update(totalStats.memoryUsage); diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.h b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.h index 3573d391b379..da734d9efe16 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.h +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.h @@ -20,23 +20,11 @@ class FeedHandler; * Class used to update metrics for a document db. */ class DocumentDBMetricsUpdater { -public: - - struct DocumentStoreCacheStats { - vespalib::CacheStats readySubDb; - vespalib::CacheStats notReadySubDb; - vespalib::CacheStats removedSubDb; - DocumentStoreCacheStats() : readySubDb(), notReadySubDb(), removedSubDb() {} - }; - -private: const DocumentSubDBCollection &_subDBs; ExecutorThreadingService &_writeService; DocumentDBJobTrackers &_jobTrackers; const AttributeUsageFilter &_writeFilter; FeedHandler &_feed_handler; - // Last updated document store cache statistics. Necessary due to metrics implementation is upside down. - DocumentStoreCacheStats _lastDocStoreCacheStats; std::optional _last_feed_handler_stats; void updateMiscMetrics(DocumentDBTaggedMetrics &metrics, const ExecutorThreadingServiceStats &threadingServiceStats); diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp index 68f89734e8fc..8153248cfd76 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp @@ -282,9 +282,7 @@ Proton::Proton(FNET_Transport & transport, const config::ConfigUri & configUri, _documentDBReferenceRegistry(std::make_shared()), _nodeUpLock(), _nodeUp(), - _posting_list_cache(), - _last_posting_list_cache_stats(), - _last_bitvector_cache_stats() + _posting_list_cache() { } BootstrapConfig::SP @@ -807,13 +805,6 @@ updateSessionCacheMetrics(ContentProtonMetrics &metrics, proton::matching::Sessi metrics.sessionCache.grouping.update(groupingStats); } -void -update_cache_stats(CacheMetrics& metrics, const vespalib::CacheStats& stats, vespalib::CacheStats& last_stats) -{ - metrics.update_metrics(stats, last_stats); - last_stats = stats; -} - } void @@ -881,12 +872,8 @@ Proton::updateMetrics(const metrics::MetricLockGuard &) } } if (_posting_list_cache) { - update_cache_stats(_metricsEngine->root().index.cache.postinglist, - _posting_list_cache->get_stats(), - _last_posting_list_cache_stats); - update_cache_stats(_metricsEngine->root().index.cache.bitvector, - _posting_list_cache->get_bitvector_stats(), - _last_bitvector_cache_stats); + _metricsEngine->root().index.cache.postinglist.update_metrics(_posting_list_cache->get_stats()); + _metricsEngine->root().index.cache.bitvector.update_metrics(_posting_list_cache->get_bitvector_stats()); } } diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h index 198037f7bad2..120dc11be225 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.h +++ b/searchcore/src/vespa/searchcore/proton/server/proton.h @@ -132,8 +132,6 @@ class Proton : public IProtonConfigurerOwner, std::mutex _nodeUpLock; std::set _nodeUp; // bucketspaces where node is up std::shared_ptr _posting_list_cache; - vespalib::CacheStats _last_posting_list_cache_stats; - vespalib::CacheStats _last_bitvector_cache_stats; std::shared_ptr addDocumentDB(const DocTypeName & docTypeName, BucketSpace bucketSpace, const std::string & configid, From 2ec5f4e41a49af51b0afd54e3b23ed5ce70a09d2 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 21 Nov 2024 11:21:47 +0000 Subject: [PATCH 111/126] add TermType.WEAK_AND --- container-search/abi-spec.json | 1 + .../src/main/java/com/yahoo/prelude/query/TermType.java | 2 ++ 2 files changed, 3 insertions(+) diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 5c70d3902c8e..6505bade5006 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -1793,6 +1793,7 @@ "public static final com.yahoo.prelude.query.TermType NOT", "public static final com.yahoo.prelude.query.TermType PHRASE", "public static final com.yahoo.prelude.query.TermType EQUIV", + "public static final com.yahoo.prelude.query.TermType WEAK_AND", "public static final com.yahoo.prelude.query.TermType DEFAULT", "public final java.lang.String name" ] diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TermType.java b/container-search/src/main/java/com/yahoo/prelude/query/TermType.java index 0ce51ee6fc91..4e93617d3b65 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/TermType.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/TermType.java @@ -22,6 +22,8 @@ public class TermType { public static final TermType EQUIV = new TermType("equiv", Item.ItemType.EQUIV, EquivItem.class, null, ""); + public static final TermType WEAK_AND = new TermType("wand", Item.ItemType.WAND, WeakAndItem.class, null, "~"); + public static final TermType DEFAULT = new TermType("", Item.ItemType.AND, CompositeItem.class, AndItem.class, ""); public final String name; From 2fe6878703ba011ebb6f579f81fe289e50f34c39 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 21 Nov 2024 13:06:20 +0000 Subject: [PATCH 112/126] handle Query.Type.WEAKAND for TermType.DEFAULT --- .../com/yahoo/prelude/semantics/engine/Evaluation.java | 2 ++ .../prelude/semantics/test/ConfigurationTestCase.java | 8 ++++---- .../yahoo/prelude/semantics/test/InheritanceTestCase.java | 8 ++++---- .../prelude/semantics/test/SemanticSearcherTestCase.java | 3 +++ .../com/yahoo/prelude/semantics/test/rulebases/rules.sr | 2 +- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java index ab46c934455c..b2c5b10d9973 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java @@ -444,6 +444,8 @@ private CompositeItem createType(TermType termType) { if (termType == TermType.DEFAULT) { if (query.getModel().getType() == Query.Type.ANY) return new OrItem(); + else if (query.getModel().getType() == Query.Type.WEAKAND) + return new WeakAndItem(); else return new AndItem(); } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java index 657911742b6c..fa7e52e12ebe 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java @@ -55,7 +55,7 @@ void testReadingConfigurationRuleBase() { void testParent() { assertSemantics("WEAKAND(100) vehiclebrand:audi", "audi cars", "parent"); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", "parent"); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", "parent.sr"); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", "parent.sr"); assertSemantics("WEAKAND(100) vw car", "vw cars", "parent"); assertSemantics("WEAKAND(100) skoda car", "skoda cars", "parent.sr"); } @@ -64,7 +64,7 @@ void testParent() { void testChild1() { assertSemantics("WEAKAND(100) vehiclebrand:skoda", "audi cars", "child1.sr"); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", "child1"); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", "child1"); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", "child1"); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "vw cars", "child1"); assertSemantics("WEAKAND(100) skoda car", "skoda cars", "child1"); } @@ -73,7 +73,7 @@ void testChild1() { void testChild2() { assertSemantics("WEAKAND(100) vehiclebrand:audi", "audi cars", "child2"); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", "child2.sr"); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", "child2.sr"); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", "child2.sr"); assertSemantics("WEAKAND(100) vw car", "vw cars", "child2"); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "skoda cars", "child2"); } @@ -82,7 +82,7 @@ void testChild2() { void testGrandchild() { assertSemantics("WEAKAND(100) vehiclebrand:skoda", "audi cars", "grandchild.sr"); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", "grandchild"); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", "grandchild"); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", "grandchild"); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "vw cars", "grandchild"); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "skoda cars", "grandchild"); } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java index d9ef73c9ddd4..4d54866fe0c0 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java @@ -88,7 +88,7 @@ void testInclusionOrderAndContentDump() { void testParent() { assertSemantics("WEAKAND(100) vehiclebrand:audi", "audi cars", parent); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", parent); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", parent); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", parent); assertSemantics("WEAKAND(100) vw car", "vw cars", parent); assertSemantics("WEAKAND(100) skoda car", "skoda cars", parent); } @@ -97,7 +97,7 @@ void testParent() { void testChild1() { assertSemantics("WEAKAND(100) vehiclebrand:skoda", "audi cars", child1); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", child1); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", child1); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", child1); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "vw cars", child1); assertSemantics("WEAKAND(100) skoda car", "skoda cars", child1); } @@ -106,7 +106,7 @@ void testChild1() { void testChild2() { assertSemantics("WEAKAND(100) vehiclebrand:audi", "audi cars", child2); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", child2); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", child2); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", child2); assertSemantics("WEAKAND(100) vw car", "vw cars", child2); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "skoda cars", child2); } @@ -115,7 +115,7 @@ void testChild2() { void testGrandchild() { assertSemantics("WEAKAND(100) vehiclebrand:skoda", "audi cars", grandchild); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", grandchild); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", grandchild); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", grandchild); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "vw cars", grandchild); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "skoda cars", grandchild); } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java index 39f7dbe6918f..0f86549552e4 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java @@ -61,6 +61,9 @@ void testLocal() { @Test void testLiteralReplacing() { assertSemantics("AND lord of rings", "lotr"); + assertSemantics("AND foo1 lord of rings bar2", "foo1 lotr bar2"); + assertSemantics("WEAKAND(100) lord of rings", "lotr", 0, Query.Type.WEAKAND); + assertSemantics("WEAKAND(100) foo1 lord of rings bar2", "foo1 lotr bar2", 0, Query.Type.WEAKAND); } @Test diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr index 9807b226d941..4aabd1e2f3e6 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr @@ -49,7 +49,7 @@ java +> -coffee; # Adding an or term something +> ?somethingelse; -# Add two or terms: +# Replace with two or terms: somethingmore -> ?more ?evenmore; # Adding another negative From f38f2999af6b9158c17cdfad2a8a42b75534e0a5 Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Thu, 21 Nov 2024 14:34:33 +0100 Subject: [PATCH 113/126] Update factory hostname --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f3d834d808c8..66968648deba 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,7 +13,7 @@ This document tells you what you need to know to contribute. All work on Vespa happens directly on GitHub, using the [GitHub flow model](https://docs.github.com/en/get-started/using-github/github-flow). We release the master branch four times a week, and you should expect it to always work. -The continuous build of Vespa is at [https://factory.vespa.oath.cloud](https://factory.vespa.oath.cloud). +The continuous build of Vespa is at [https://factory.vespa.ai](https://factory.vespa.ai). You can follow the fate of each commit there. All pull requests must be approved by a From 1ce157764716d45c74ad152acb9cff5d68e32e82 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 21 Nov 2024 13:25:25 +0000 Subject: [PATCH 114/126] newParent() should do the same as createType(); make it so. Also add unit test triggering the case where it matters. --- .../yahoo/prelude/semantics/engine/Evaluation.java | 2 +- .../semantics/test/SemanticSearcherTestCase.java | 11 +++++++++++ .../yahoo/prelude/semantics/test/rulebases/rules.sr | 5 +++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java index b2c5b10d9973..5119ec1a1e77 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java @@ -378,7 +378,7 @@ else if (newParent.acceptsItemsOfType(current.getItemType())) { // insert new pa } private CompositeItem newParent(TermType desiredParentType) { - return desiredParentType == TermType.DEFAULT ? new AndItem() : (CompositeItem)desiredParentType.createItemClass(); + return createType(desiredParentType); } private Item combineItems(Item first, Item second, TermType termType) { diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java index 0f86549552e4..0dc3e3a8ed9e 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java @@ -176,6 +176,17 @@ void testNullQuery() { assertEquals(NullItem.class, query.getModel().getQueryTree().getRoot().getClass()); // Still a NullItem } + @Test + void testPhraseReplacementCornerCase() { + assertSemantics("brand:smashtogether", "\"smash together\""); + assertSemantics("brand:smashtogether", "smash-together"); + assertSemantics("AND foo1 brand:smashtogether bar2", "foo1 \"smash together\" bar2"); + assertSemantics("AND brand:smashtogether \"foo1 bar2\"", "\"foo1 smash together bar2\""); + assertSemantics("OR brand:smashtogether \"foo1 bar2\"", "\"foo1 smash together bar2\"", 0, Query.Type.ANY); + // the difference in ordering here is because the parsed query already has a WEAKAND root (with 1 child): + assertSemantics("WEAKAND(100) \"foo1 bar2\" brand:smashtogether", "\"foo1 smash together bar2\"", 0, Query.Type.WEAKAND); + } + private Result doSearch(Searcher searcher, Query query, int offset, int hits) { query.setOffset(offset); query.setHits(hits); diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr index 4aabd1e2f3e6..bd70faa8ef95 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr @@ -72,3 +72,8 @@ the -> ; [typechange] -> $default:[typechange] ; [typechange] :- typechange; + +# Replacing a phrase and changing index +[myphrase] -> brand:smashtogether ; + +[myphrase] :- smash together; From caa7d8c7dee9cdff3801cb912d6c7a5026af9b2d Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 21 Nov 2024 15:13:22 +0100 Subject: [PATCH 115/126] Extend disk index explorer with disk usage per field. Extend attribute vector explorer with disk usage. --- .../proton/attribute/attribute_vector_explorer.cpp | 5 ++++- .../vespa/searchcorespi/index/index_manager_explorer.cpp | 8 +++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp index af10655b7a1c..a38d16d6792a 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp @@ -162,7 +162,9 @@ AttributeVectorExplorer::get_state_helper(const AttributeVector& attr, const ves Cursor &object = inserter.insertObject(); if (full) { convert_config_to_slime(attr.getConfig(), full, object.setObject("config")); - StateExplorerUtils::status_to_slime(status, object.setObject("status")); + auto& slime_status = object.setObject("status"); + StateExplorerUtils::status_to_slime(status, slime_status); + slime_status.setLong("disk_usage", attr.size_on_disk()); convertGenerationToSlime(attr, object.setObject("generation")); convertAddressSpaceUsageToSlime(attr.getAddressSpaceUsage(), object.setObject("addressSpaceUsage")); // TODO: Consider making enum store, multivalue mapping, posting list attribute and tensor attribute @@ -191,6 +193,7 @@ AttributeVectorExplorer::get_state_helper(const AttributeVector& attr, const ves } else { convert_config_to_slime(attr.getConfig(), full, object); object.setLong("allocated_bytes", status.getAllocated()); + object.setLong("disk_usage", attr.size_on_disk()); } } diff --git a/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp b/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp index e16df0a0bb6e..d35eb33f7af0 100644 --- a/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp +++ b/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp @@ -23,7 +23,13 @@ insertDiskIndex(Cursor &arrayCursor, const DiskIndexStats &diskIndex) const IndexStats &sstats = diskIndex.get_index_stats(); diskIndexCursor.setLong("serialNum", diskIndex.getSerialNum()); diskIndexCursor.setString("indexDir", diskIndex.getIndexdir()); - diskIndexCursor.setLong("sizeOnDisk", sstats.sizeOnDisk()); + diskIndexCursor.setLong("disk_usage", sstats.sizeOnDisk()); + auto& fields = diskIndexCursor.setArray("fields"); + for (auto& field_stats : sstats.get_field_stats()) { + auto& field = fields.addObject(); + field.setString("name", field_stats.first); + field.setLong("disk_usage", field_stats.second.size_on_disk()); + } } void From f211dafcb48b58b36c984a293645e1aab608f91e Mon Sep 17 00:00:00 2001 From: Morten Tokle Date: Thu, 21 Nov 2024 15:51:27 +0100 Subject: [PATCH 116/126] Support rollout og service using version --- .../com/yahoo/vespa/config/server/deploy/ModelContextImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index ef4aefff52fa..030121627aed 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -416,7 +416,7 @@ public Properties(ApplicationId applicationId, this.endpointConnectionTtl = Duration.ofSeconds(PermanentFlags.ENDPOINT_CONNECTION_TTL.bindTo(flagSource).with(applicationId).value()); this.dataplaneTokens = dataplaneTokens; this.requestPrefixForLoggingContent = PermanentFlags.LOG_REQUEST_CONTENT.bindTo(flagSource).with(applicationId).value(); - this.launchApplicationAthenzService = Flags.LAUNCH_APPLICATION_ATHENZ_SERVICE.bindTo(flagSource).with(applicationId).value(); + this.launchApplicationAthenzService = Flags.LAUNCH_APPLICATION_ATHENZ_SERVICE.bindTo(flagSource).with(applicationId).with(modelVersion).value(); } @Override public ModelContext.FeatureFlags featureFlags() { return featureFlags; } From a47a6ff68bb0b5ec7a8550e15bc80664eb78e0d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Meland?= Date: Thu, 21 Nov 2024 16:07:56 +0100 Subject: [PATCH 117/126] typo --- flags/src/main/java/com/yahoo/vespa/flags/Flags.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index bd3d7b73bf5f..13d0a73b2701 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -399,7 +399,7 @@ public class Flags { "Takes effect immediately"); public static UnboundBooleanFlag ATLASSIAN_SYNC_TENANTS = defineFeatureFlag( - "atlassianb-sync-tenants", false, + "atlassian-sync-tenants", false, List.of("bjormel"), "2024-11-11", "2025-01-01", "Whether to sync tenants to Atlassian", "Takes effect immediately"); From 4fc48f322f7e749a805d2a5f29308a6f07818546 Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Fri, 22 Nov 2024 10:41:28 +0100 Subject: [PATCH 118/126] chore: more secure way to execute vespa queries --- .../common/command/commandtypes/RunVespaQuery.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/common/command/commandtypes/RunVespaQuery.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/common/command/commandtypes/RunVespaQuery.java index b66d446da789..b626bb546b24 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/common/command/commandtypes/RunVespaQuery.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/common/command/commandtypes/RunVespaQuery.java @@ -56,7 +56,7 @@ public Object execute(EventExecuteCommandContext context) { runVespaQuery(queryCommand, context.logger).thenAccept(result -> { if (!result.success()) { - if (result.result().toLowerCase().contains("command not found")) { + if (result.result().toLowerCase().contains("cannot run program")) { context.messageHandler.sendMessage(MessageType.Error, "Could not find vespa CLI. Make sure vespa CLI is installed and added to path. Download vespa CLI here: https://docs.vespa.ai/en/vespa-cli.html"); return; } @@ -107,13 +107,10 @@ private CompletableFuture runVespaQuery(String query, ClientLogger ProcessBuilder builder = new ProcessBuilder(); - String queryEscaped = query.replace("\"", "\\\""); - String vespaCommand = String.format("vespa query \"%s\"", queryEscaped); - if (isWindows) { - builder.command("cmd.exe", "/c", vespaCommand); // TODO: Test this on windows + builder.command("cmd.exe", "/c", "vespa", "query", query); // TODO: Test this on windows } else { - builder.command("/bin/sh", "-c", vespaCommand); + builder.command("vespa", "query", query); } return CompletableFuture.supplyAsync(() -> { @@ -146,8 +143,7 @@ private CompletableFuture runVespaQuery(String query, ClientLogger } catch (InterruptedException e) { return new QueryResult(false, "Program interrupted"); } catch (IOException e) { - logger.error(e.getMessage()); - return new QueryResult(false, "IOException occurred."); + return new QueryResult(false, e.getMessage()); } }); } From 7e05a29df7358f9816fd39edd0866e59c5345fbf Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Fri, 22 Nov 2024 10:42:22 +0100 Subject: [PATCH 119/126] feat: Add YQL support to VSCode --- .../src/main/resources/META-INF/plugin.xml | 3 ++ .../clients/vscode/README.md | 7 +++++ .../clients/vscode/package.json | 16 +++++++++-- .../clients/vscode/src/extension.ts | 28 +++++++------------ 4 files changed, 34 insertions(+), 20 deletions(-) diff --git a/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml b/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml index 92b5b8c7d9dd..389eded23328 100644 --- a/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml +++ b/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml @@ -38,5 +38,8 @@ In addition, the plugin will be available for community editions as well. + diff --git a/integration/schema-language-server/clients/vscode/README.md b/integration/schema-language-server/clients/vscode/README.md index 47983f3ea6e6..b2109fc9e8fe 100644 --- a/integration/schema-language-server/clients/vscode/README.md +++ b/integration/schema-language-server/clients/vscode/README.md @@ -14,6 +14,11 @@ Features: - Renaming/refactoring - List document symbols +YQL Features: +- Error highlighting +- Semantic token highlighting +- Running Queries directly from `.yql` files + ## Requirements The extension requires Java 17 or greater. Upon activation, the extension will look in the following locations in this order for a Java executable: @@ -23,6 +28,8 @@ The extension requires Java 17 or greater. Upon activation, the extension will l - JDK_HOME environment variable - JAVA_HOME environment variable +The extension also requires [Vespa CLI](https://docs.vespa.ai/en/vespa-cli.html) to run Vespa Queries from `.yql` files. + ## XML support This extension bundles with an extension to the [LemMinX XML Language server](https://github.com/eclipse/lemminx). This is to provide additional support when editing the services.xml file in Vespa applications. diff --git a/integration/schema-language-server/clients/vscode/package.json b/integration/schema-language-server/clients/vscode/package.json index 14f4265435e0..7e06f3c5d532 100644 --- a/integration/schema-language-server/clients/vscode/package.json +++ b/integration/schema-language-server/clients/vscode/package.json @@ -15,7 +15,8 @@ ], "keywords": [ "Vespa", - "Schema" + "Schema", + "YQL" ], "repository": { "type": "git", @@ -24,7 +25,8 @@ "icon": "images/icon.png", "activationEvents": [ "onLanguage:xml", - "onLanguage:vespaSchema" + "onLanguage:vespaSchema", + "onLanguage:vespaYQL" ], "main": "./dist/extension.js", "contributes": { @@ -39,6 +41,16 @@ ".profile" ], "configuration": "./language-configuration.json" + }, + { + "id": "vespaYQL", + "aliases": [ + "Vespa YQL" + ], + "extensions": [ + ".yql" + ], + "configuration": "./language-configuration.json" } ], "xml.javaExtensions": [ diff --git a/integration/schema-language-server/clients/vscode/src/extension.ts b/integration/schema-language-server/clients/vscode/src/extension.ts index a23b760c89fd..d70bb9dbebf2 100644 --- a/integration/schema-language-server/clients/vscode/src/extension.ts +++ b/integration/schema-language-server/clients/vscode/src/extension.ts @@ -72,24 +72,16 @@ function createAndStartClient(serverPath: string): LanguageClient | null { let clientOptions: LanguageClientOptions = { // Register the server for plain text documents - documentSelector: [{ - scheme: 'file', - language: 'vespaSchema', - }], - middleware: { - provideCompletionItem: async (document, position, context, token, next) => { - const r = await next(document, position, context, token); - return r; - }, - provideDocumentHighlights: async (document, position, token, next) => { - const r = await next(document, position, token); - return r; - }, - provideDocumentSemanticTokens: async (document, token, next) => { - const r = await next(document, token); - return r; - }, - }, + documentSelector: [ + { + scheme: 'file', + language: 'vespaSchema', + }, + { + scheme: 'file', + language: 'vespaYQL' + } + ], synchronize: { fileEvents: vscode.workspace.createFileSystemWatcher("**/*{.sd,.profile}") } From cdfcb040f6efed522498e60d02ac35ac2651f39b Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Fri, 22 Nov 2024 11:10:23 +0100 Subject: [PATCH 120/126] chore: update README for the IntelliJ plugin --- .../intellij/src/main/resources/META-INF/plugin.xml | 12 +++++++++--- .../schema-language-server/clients/vscode/README.md | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml b/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml index 389eded23328..6f2f61a80515 100644 --- a/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml +++ b/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml @@ -21,11 +21,17 @@
  • Renaming
  • +

    YQL Features

    +
      +
    • Error highlighting
    • +
    • Syntax highlighting
    • +
    • Running queries directly from .yql files +
    + ]]> Refactored to use LSP4IJ -The plugin will now support better syntax highlighting with semantic tokens and renaming. -In addition, the plugin will be available for community editions as well. +

    Simple support for YQL

    +The plugin now supports syntax highlighting of .yql files, in addition to run the queries directly from the editor. ]]>
    com.intellij.modules.platform com.redhat.devtools.lsp4ij diff --git a/integration/schema-language-server/clients/vscode/README.md b/integration/schema-language-server/clients/vscode/README.md index b2109fc9e8fe..bf8b58329137 100644 --- a/integration/schema-language-server/clients/vscode/README.md +++ b/integration/schema-language-server/clients/vscode/README.md @@ -17,7 +17,7 @@ Features: YQL Features: - Error highlighting - Semantic token highlighting -- Running Queries directly from `.yql` files +- Running queries directly from `.yql` files ## Requirements The extension requires Java 17 or greater. Upon activation, the extension will look in the following locations in this order for a Java executable: From 97fed776bcc24b590e3f4c185c117b4f210bb243 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Fri, 22 Nov 2024 11:24:26 +0100 Subject: [PATCH 121/126] Use created time for local session if remote session is missing To be able to delete expired local sessions when remote sessions are missing, we need to use created time for session from file system --- .../server/session/SessionRepository.java | 59 ++++++++++++------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index fd85ba4064f8..291fe7b37a0a 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -624,31 +624,44 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi for (Long sessionId : sessions) { try { Session session = remoteSessionCache.get(sessionId); - if (session == null) - session = new RemoteSession(tenantName, sessionId, createSessionZooKeeperClient(sessionId)); + Instant createTime; + Optional localSessionCreateTime = Optional.empty(); + boolean deleteRemoteSession = true; + if (session == null) { + // If remote session is missing (deleted from zookeeper) it will only be present in file system, + // so use local session and its creation time from file system + var localSession = getOptionalSessionFromFileSystem(sessionId); + if (localSession.isEmpty()) continue; + + session = localSession.get(); + createTime = localSessionCreated((LocalSession) session); + localSessionCreateTime= Optional.of(createTime); + deleteRemoteSession = false; + } else { + createTime = session.getCreateTime(); + } Optional applicationId = session.getOptionalApplicationId(); try (var ignored = lockApplication(applicationId)) { Session.Status status = session.getStatus(); boolean activeForApplication = sessionIsActiveForApplication.test(session); - log.log(Level.FINE, () -> "local session " + sessionId + - ", status " + status + (status == UNKNOWN ? "" : ", activeForApplication " + activeForApplication)); if (status == ACTIVATE && activeForApplication) continue; - Instant createTime = session.getCreateTime(); boolean hasExpired = hasExpired(createTime); - log.log(Level.FINE, () -> "local session " + sessionId + - ", status " + status + (status == UNKNOWN ? "" : ", created " + createTime + - ", has expired: " + hasExpired)); + log.log(Level.FINE, "Session " + sessionId + ", status " + status + ", has expired: " + hasExpired); if (! hasExpired) continue; - log.log(Level.FINE, () -> "Remote session " + sessionId + " for " + tenantName + " has expired, deleting it"); - deleteRemoteSessionFromZooKeeper(session); - deletedRemoteSessions++; + log.log(Level.FINE, "session " + sessionId + ", status " + status + + ", remote session created " + createTime + + ", local session created " + localSessionCreateTime); + if (deleteRemoteSession) { + log.log(Level.FINE, () -> "Remote session " + sessionId + " for " + tenantName + " has expired, deleting it"); + deleteRemoteSessionFromZooKeeper(session); + deletedRemoteSessions++; + } - var localSessionCanBeDeleted = canBeDeleted(sessionId, status, createTime, activeForApplication); - if (localSessionCanBeDeleted) { - log.log(Level.FINE, () -> "Expired local session " + sessionId + " can be deleted"); + if (localSessionCanBeDeleted(status, createTime, activeForApplication)) { + log.log(Level.FINE, () -> "Local session " + sessionId + " for " + tenantName + " has expired, deleting it"); deleteLocalSession(sessionId); deletedLocalSessions++; } @@ -698,21 +711,25 @@ private boolean hasExpired(Instant created) { private long sessionLifeTimeInSeconds() { return configserverConfig.sessionLifetime(); } - private boolean canBeDeleted(long sessionId, Session.Status status, Instant createTime, boolean activeForApplication) { - // Delete Sessions with state other than UNKNOWN or ACTIVATE or old sessions in UNKNOWN state - if ( ! List.of(UNKNOWN, ACTIVATE).contains(status) || oldSessionDirWithUnknownStatus(sessionId, status)) + private boolean localSessionCanBeDeleted(Session.Status status, Instant createTime, boolean activeForApplication) { + // Delete sessions with state other than UNKNOWN or ACTIVATE or old sessions in UNKNOWN state + if ( ! List.of(UNKNOWN, ACTIVATE).contains(status) || oldSessionDirWithUnknownStatus(createTime, status)) return true; // This might happen if remote session is gone, but local session is not return isOldAndCanBeDeleted(createTime) && !activeForApplication; } - private boolean oldSessionDirWithUnknownStatus(long sessionId, Session.Status status) { + private boolean oldSessionDirWithUnknownStatus(Instant created, Session.Status status) { Duration expiryTime = Duration.ofHours(configserverConfig.keepSessionsWithUnknownStatusHours()); - File sessionDir = tenantFileSystemDirs.getUserApplicationDir(sessionId); - return sessionDir.exists() + return created != Instant.EPOCH // We don't know anything about creation time for this session && status == UNKNOWN - && created(sessionDir).plus(expiryTime).isBefore(clock.instant()); + && created.plus(expiryTime).isBefore(clock.instant()); + } + + private Instant localSessionCreated(LocalSession session) { + File sessionDir = tenantFileSystemDirs.getUserApplicationDir(session.getSessionId()); + return sessionDir.exists() ? created(sessionDir) : Instant.EPOCH; } private Set findNewSessionsInFileSystem() { From ba234f99ef801020096169cf8ed62dd6756d8a9f Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Fri, 22 Nov 2024 11:31:54 +0100 Subject: [PATCH 122/126] chore: add VespaCLI requirement note in the IntelliJ plugin description --- .../clients/intellij/src/main/resources/META-INF/plugin.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml b/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml index 6f2f61a80515..e0880db8d2d6 100644 --- a/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml +++ b/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml @@ -28,6 +28,9 @@
  • Running queries directly from .yql files +

    Requirements

    +The plugin requires Vespa CLI to be installed to be able to run Vespa Queries from .yql files. + ]]> Simple support for YQL From dbe4d4dadcd99790a12f22c78da0dcae8ed5aed5 Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Fri, 22 Nov 2024 11:51:09 +0100 Subject: [PATCH 123/126] fix: Continuation parsing bug --- .../schemals/schemadocument/YQLDocument.java | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java index e6dffbbe2287..54d924f201b0 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java @@ -21,7 +21,6 @@ import ai.vespa.schemals.tree.Node; import ai.vespa.schemals.tree.SchemaNode; import ai.vespa.schemals.tree.YQLNode; -import ai.vespa.schemals.tree.YQL.YQLUtils; public class YQLDocument implements DocumentManager { @@ -149,7 +148,16 @@ private static int findContinuationLength(String inputString) { return continuationEnd; } - private static ParseResult parseContinuation(String inputString, Position offset) { + private static boolean detectContinuation(String inputString) { + for (int i = 0; i < inputString.length(); i++) { + if (inputString.charAt(i) != ' ') { + return inputString.charAt(i) == '{'; + } + } + return false; + } + + private static YQLPartParseResult parseContinuation(String inputString, Position offset) { YQLPlusParser parser = new YQLPlusParser(inputString); @@ -162,7 +170,9 @@ private static ParseResult parseContinuation(String inputString, Position offset var node = parser.rootNode(); YQLNode retNode = new YQLNode(node, offset); - return new ParseResult(List.of(), Optional.of(retNode)); + int charsRead = parser.getToken(0).getEndOffset(); + + return new YQLPartParseResult(List.of(), Optional.of(retNode), charsRead); } private static YQLPartParseResult parseYQLQuery(ParseContext context, String queryString, Position offset) { @@ -193,19 +203,20 @@ private static YQLPartParseResult parseYQLQuery(ParseContext context, String que charsRead++; // Look for continuation - int continuationLength = findContinuationLength(groupingString); - if (continuationLength != 0) { - String continuationString = groupingString.substring(0, continuationLength); - ParseResult continuationResults = parseContinuation(continuationString, groupOffset); + boolean continuationDetected = detectContinuation(groupingString); + if (continuationDetected) { + YQLPartParseResult continuationResults = parseContinuation(groupingString, groupOffset); diagnostics.addAll(continuationResults.diagnostics()); if (continuationResults.CST().isPresent()) { ret.addChild(continuationResults.CST().get()); } - charsRead += continuationLength; - groupingString = groupingString.substring(continuationLength); + charsRead += continuationResults.charsRead(); + String continuationString = groupingString.substring(0, continuationResults.charsRead()); Position continuationPosition = StringUtils.getStringPosition(continuationString); + + groupingString = groupingString.substring(continuationResults.charsRead()); groupOffset = CSTUtils.addPositions(groupOffset, continuationPosition); } From 533cb8ec189b6e902ccd724d76533f5f665e0fef Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Fri, 22 Nov 2024 11:53:53 +0100 Subject: [PATCH 124/126] chore: remove unsued function --- .../schemals/schemadocument/YQLDocument.java | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java index 54d924f201b0..e2793e5df3d8 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java @@ -113,41 +113,6 @@ private static YQLPartParseResult parseYQLPart(CharSequence content, ClientLogge return new YQLPartParseResult(List.of(), Optional.of(retNode), charsRead); } - private static int findContinuationLength(String inputString) { - - // BUG: This never check if the curly bracket are in a string or something else - - char[] charArr = inputString.toCharArray(); - int continuationStart = -1; - for (int i = 0; i < charArr.length; i++) { - if (!Character.isWhitespace(charArr[i])) { - if (charArr[i] != '{') { - return 0; - } - - continuationStart = i; - break; - - } - } - if (continuationStart == -1) return 0; - - - int level = 0; - int continuationEnd = charArr.length; - for (int i = continuationStart; i < charArr.length; i++) { - if (charArr[i] == '{') level++; - if (charArr[i] == '}') level--; - - if (level == 0) { - continuationEnd = i + 1; - break; - }; - } - - return continuationEnd; - } - private static boolean detectContinuation(String inputString) { for (int i = 0; i < inputString.length(); i++) { if (inputString.charAt(i) != ' ') { From bd4f4ed50c0fa1ebb1e4263cf2c2b4c8877eb79d Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Fri, 22 Nov 2024 12:29:08 +0100 Subject: [PATCH 125/126] fix: Infinite loop in YQL parser at special chars --- .../java/ai/vespa/schemals/schemadocument/YQLDocument.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java index e2793e5df3d8..b38171041783 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java @@ -9,6 +9,8 @@ import org.eclipse.lsp4j.Range; import org.eclipse.lsp4j.VersionedTextDocumentIdentifier; +import com.google.protobuf.Option; + import ai.vespa.schemals.SchemaDiagnosticsHandler; import ai.vespa.schemals.common.ClientLogger; import ai.vespa.schemals.common.StringUtils; @@ -106,6 +108,8 @@ private static YQLPartParseResult parseYQLPart(CharSequence content, ClientLogge int charsRead = parser.getToken(0).getEndOffset(); + if (charsRead == 0) return new YQLPartParseResult(List.of(), Optional.empty(), charsRead); + ai.vespa.schemals.parser.yqlplus.Node node = parser.rootNode(); YQLNode retNode = new YQLNode(node, offset); // YQLUtils.printTree(logger, node); @@ -228,6 +232,8 @@ public static ParseResult parseContent(ParseContext context) { if (result.CST().isPresent()) { ret.addChild(result.CST().get()); } + + if (result.charsRead() == 0) result.charsRead++; int newOffset = content.indexOf('\n', charsRead + result.charsRead()); if (newOffset == -1) { From 788cb05896437fe637e5b6a4eb52b14874b948df Mon Sep 17 00:00:00 2001 From: Theodor Kvalsvik Lauritzen Date: Fri, 22 Nov 2024 12:30:08 +0100 Subject: [PATCH 126/126] chore: remove unused library --- .../main/java/ai/vespa/schemals/schemadocument/YQLDocument.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java index b38171041783..3633a5bd378c 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java @@ -9,8 +9,6 @@ import org.eclipse.lsp4j.Range; import org.eclipse.lsp4j.VersionedTextDocumentIdentifier; -import com.google.protobuf.Option; - import ai.vespa.schemals.SchemaDiagnosticsHandler; import ai.vespa.schemals.common.ClientLogger; import ai.vespa.schemals.common.StringUtils;
  • NameDescriptionUnitSuffixes
    NameUnitSuffixesDescription