diff --git a/.buildkite/factory-command-new-factory.sh b/.buildkite/factory-command-new-factory.sh deleted file mode 100755 index 5d49121db313..000000000000 --- a/.buildkite/factory-command-new-factory.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -set -eo pipefail - -if (( $# < 1 )); then - echo "Usage: $0 [options]" - exit 1 -fi - -COMMAND=$1 -FACTORY_API="https://api.factory.vespa.ai/factory/v1" - -CURL="curl -sL --key /workspace/identity/key --cert /workspace/identity/cert" -TOKEN=$(curl -sL --key /workspace/identity/key --cert /workspace/identity/cert -X POST -H "Content-Type: application/x-www-form-urlencoded" -d"grant_type=client_credentials&scope=vespa.factory%3Adomain" "https://zts.athenz.vespa-cloud.com:4443/zts/v1/oauth2/token" | jq -re '.access_token') - -shift -case $COMMAND in - get-version) - VERSION=$1 - if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL -H "Authorization: Bearer $TOKEN" "$FACTORY_API/versions/$VERSION" - ;; - create-build) - FACTORY_PIPELINE_ID=$1 - FACTORY_PLATFORM=$2 - if [[ -z $FACTORY_PIPELINE_ID ]]; then echo "Usage: $0 $COMMAND [factory platform]"; exit 1; fi - if [[ -z $FACTORY_PLATFORM ]]; then FACTORY_PLATFORM="opensource_centos7"; fi - $CURL -H "Authorization: Bearer $TOKEN" -d "{ - \"startSeconds\": $(date +%s), - \"sdApiUrl\": \"https://api.buildkite.com/\", - \"pipelineId\": $FACTORY_PIPELINE_ID, - \"jobId\": 0, - \"buildId\": $BUILDKITE_BUILD_NUMBER, - \"platform\": \"$FACTORY_PLATFORM\" - }" \ - "$FACTORY_API/builds" - ;; - create-release) - $CURL -H "Authorization: Bearer $TOKEN" -d "{ - \"startSeconds\": $(date +%s), - \"systemName\": \"opensource\" - }" \ - "$FACTORY_API/releases" - ;; - update-build-status) - FACTORY_PIPELINE_ID=$1 - STATUS=$2 - DESCRIPTION=$3 - FACTORY_BUILD_NUMBER=$(( FACTORY_PIPELINE_ID << 32 | BUILDKITE_BUILD_NUMBER & 0xFFFFFF )) - if [[ -z $FACTORY_PIPELINE_ID ]] || [[ -z $STATUS ]] || [[ -z $DESCRIPTION ]]; then - echo "Usage: $0 $COMMAND " - exit 1 - fi - $CURL -H "Authorization: Bearer $TOKEN" -d "{ - \"updatedSeconds\": $(date +%s), - \"sdApiUrl\": \"https://api.buildkite.com/\", - \"pipelineId\": $FACTORY_PIPELINE_ID, - \"jobId\": 0, - \"buildId\": $FACTORY_BUILD_NUMBER, - \"status\": \"$STATUS\", - \"description\": \"$DESCRIPTION\" - }" \ - "$FACTORY_API/builds/$FACTORY_BUILD_NUMBER/status" - ;; - update-released-time) - VERSION=$1 - if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL -H "Authorization: Bearer $TOKEN" -d "{ - \"releasedSeconds\": $(date +%s), - \"systemName\": \"opensource\" - }" \ - "$FACTORY_API/releases/$VERSION" - ;; - *) - echo "Unknown command $COMMAND" - exit 1 - ;; -esac diff --git a/.buildkite/factory-command.sh b/.buildkite/factory-command.sh index ce9ecc70ef58..5d49121db313 100755 --- a/.buildkite/factory-command.sh +++ b/.buildkite/factory-command.sh @@ -8,47 +8,24 @@ if (( $# < 1 )); then fi COMMAND=$1 -FACTORY_API="https://factory.vespa.aws-us-east-1a.vespa.oath.cloud/api/factory/v1" -COOKIEJAR=$(pwd)/jar.txt -# shellcheck disable=2064 -trap "rm -f $COOKIEJAR" EXIT +FACTORY_API="https://api.factory.vespa.ai/factory/v1" -SESSION_TOKEN=null -WAIT_UNTIL=$(( $(date +%s) + 120 )) -set +e -while [[ $SESSION_TOKEN == null ]]; do - SESSION_TOKEN=$(curl -s -H 'Content-Type: application/json' -H 'Accept: application/json' -d "{ \"username\": \"svc-okta-vespa-factory\", \"password\": \"$SVC_OKTA_VESPA_FACTORY_TOKEN\" }" https://ouryahoo.okta.com/api/v1/authn | jq -re '.sessionToken') - - if [[ $SESSION_TOKEN == null ]]; then - if [[ $(date +%s) -ge $WAIT_UNTIL ]]; then - echo "Could not fetch session token from Okta: SESSION_TOKEN=$SESSION_TOKEN" - exit 1 - else - echo "Invalid SESSION_TOKEN=$SESSION_TOKEN . Trying again ..." >&2 - sleep 3 - fi - fi -done -set -e - -LOCATION=$(curl -s -i -c "$COOKIEJAR" "https://factory.vespa.aws-us-east-1a.vespa.oath.cloud/login" | grep location | awk '{print $2}' | tr -d '\r') -curl -sL -b "$COOKIEJAR" -c "$COOKIEJAR" "$LOCATION&sessionToken=$SESSION_TOKEN" &> /dev/null - -CURL="curl -sL -b $COOKIEJAR" +CURL="curl -sL --key /workspace/identity/key --cert /workspace/identity/cert" +TOKEN=$(curl -sL --key /workspace/identity/key --cert /workspace/identity/cert -X POST -H "Content-Type: application/x-www-form-urlencoded" -d"grant_type=client_credentials&scope=vespa.factory%3Adomain" "https://zts.athenz.vespa-cloud.com:4443/zts/v1/oauth2/token" | jq -re '.access_token') shift case $COMMAND in get-version) VERSION=$1 if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL "$FACTORY_API/versions/$VERSION" + $CURL -H "Authorization: Bearer $TOKEN" "$FACTORY_API/versions/$VERSION" ;; create-build) FACTORY_PIPELINE_ID=$1 FACTORY_PLATFORM=$2 if [[ -z $FACTORY_PIPELINE_ID ]]; then echo "Usage: $0 $COMMAND [factory platform]"; exit 1; fi if [[ -z $FACTORY_PLATFORM ]]; then FACTORY_PLATFORM="opensource_centos7"; fi - $CURL -d "{ + $CURL -H "Authorization: Bearer $TOKEN" -d "{ \"startSeconds\": $(date +%s), \"sdApiUrl\": \"https://api.buildkite.com/\", \"pipelineId\": $FACTORY_PIPELINE_ID, @@ -59,7 +36,7 @@ case $COMMAND in "$FACTORY_API/builds" ;; create-release) - $CURL -d "{ + $CURL -H "Authorization: Bearer $TOKEN" -d "{ \"startSeconds\": $(date +%s), \"systemName\": \"opensource\" }" \ @@ -74,7 +51,7 @@ case $COMMAND in echo "Usage: $0 $COMMAND " exit 1 fi - $CURL -d "{ + $CURL -H "Authorization: Bearer $TOKEN" -d "{ \"updatedSeconds\": $(date +%s), \"sdApiUrl\": \"https://api.buildkite.com/\", \"pipelineId\": $FACTORY_PIPELINE_ID, @@ -88,7 +65,7 @@ case $COMMAND in update-released-time) VERSION=$1 if [[ -z $VERSION ]]; then echo "Usage: $0 $COMMAND "; exit 1; fi - $CURL -d "{ + $CURL -H "Authorization: Bearer $TOKEN" -d "{ \"releasedSeconds\": $(date +%s), \"systemName\": \"opensource\" }" \ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f3d834d808c8..66968648deba 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,7 +13,7 @@ This document tells you what you need to know to contribute. All work on Vespa happens directly on GitHub, using the [GitHub flow model](https://docs.github.com/en/get-started/using-github/github-flow). We release the master branch four times a week, and you should expect it to always work. -The continuous build of Vespa is at [https://factory.vespa.oath.cloud](https://factory.vespa.oath.cloud). +The continuous build of Vespa is at [https://factory.vespa.ai](https://factory.vespa.ai). You can follow the fate of each commit there. All pull requests must be approved by a diff --git a/README.md b/README.md index a91e0cbd30c1..ba3fa66f27d9 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ A new release of Vespa is made from this repository's master branch every mornin - Home page: [https://vespa.ai](https://vespa.ai) - Documentation: [https://docs.vespa.ai](https://docs.vespa.ai) -- Continuous build: [https://factory.vespa.oath.cloud](https://factory.vespa.oath.cloud) +- Continuous build: [https://factory.vespa.ai](https://factory.vespa.ai) - Run applications in the cloud for free: [https://cloud.vespa.ai](https://cloud.vespa.ai) ## Table of contents diff --git a/client/go/internal/vespa/deploy.go b/client/go/internal/vespa/deploy.go index 2c96b8b09352..0ca070f73a68 100644 --- a/client/go/internal/vespa/deploy.go +++ b/client/go/internal/vespa/deploy.go @@ -203,7 +203,30 @@ func fetchFromConfigServer(deployment DeploymentOptions, path string) error { if err := zipDir(dir, zipFile, &ignore.List{}); err != nil { return err } - return os.Rename(zipFile, path) + if err = renameOrCopyTmpFile(zipFile, path); err != nil { + return fmt.Errorf("Could neither rename nor copy %s to %s: %w", zipFile, path, err) + } + return err +} + +func renameOrCopyTmpFile(srcPath, dstPath string) error { + if err := os.Rename(srcPath, dstPath); err == nil { + return err + } + src, err := os.Open(srcPath) + if err != nil { + return err + } + stat, err := os.Stat(srcPath) + if err != nil { + return err + } + dst, err := os.OpenFile(dstPath, os.O_CREATE|os.O_WRONLY, stat.Mode()) + if err != nil { + return err + } + _, err = io.Copy(dst, src) + return err } func fetchFilesFromConfigServer(deployment DeploymentOptions, contentURL *url.URL, path string) error { diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json index fc534be94dab..e61c2a196ba8 100644 --- a/config-model-api/abi-spec.json +++ b/config-model-api/abi-spec.json @@ -1390,6 +1390,7 @@ "public java.util.Optional endpointCertificateSecrets()", "public java.util.Optional athenzDomain()", "public com.yahoo.config.model.api.Quota quota()", + "public java.util.List tenantVaults()", "public java.util.List tenantSecretStores()", "public java.lang.String jvmGCOptions()", "public abstract java.lang.String jvmGCOptions(java.util.Optional)", @@ -1798,6 +1799,44 @@ ], "fields" : [ ] }, + "com.yahoo.config.model.api.TenantVault$Secret" : { + "superClass" : "java.lang.Record", + "interfaces" : [ ], + "attributes" : [ + "public", + "final", + "record" + ], + "methods" : [ + "public void (java.lang.String, java.lang.String)", + "public final java.lang.String toString()", + "public final int hashCode()", + "public final boolean equals(java.lang.Object)", + "public java.lang.String name()", + "public java.lang.String id()" + ], + "fields" : [ ] + }, + "com.yahoo.config.model.api.TenantVault" : { + "superClass" : "java.lang.Record", + "interfaces" : [ ], + "attributes" : [ + "public", + "final", + "record" + ], + "methods" : [ + "public void (java.lang.String, java.lang.String, java.lang.String, java.util.List)", + "public final java.lang.String toString()", + "public final int hashCode()", + "public final boolean equals(java.lang.Object)", + "public java.lang.String id()", + "public java.lang.String name()", + "public java.lang.String externalId()", + "public java.util.List secrets()" + ], + "fields" : [ ] + }, "com.yahoo.config.model.api.ValidationParameters$CheckRouting" : { "superClass" : "java.lang.Enum", "interfaces" : [ ], diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java b/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java index 786484f4fe22..99211c1d39bf 100644 --- a/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java +++ b/config-model-api/src/main/java/com/yahoo/config/application/api/xml/DeploymentSpecXmlReader.java @@ -91,7 +91,6 @@ public class DeploymentSpecXmlReader { private static final String idAttribute = "id"; private static final String athenzServiceAttribute = "athenz-service"; private static final String athenzDomainAttribute = "athenz-domain"; - private static final String testerFlavorAttribute = "tester-flavor"; private static final String testerTag = "tester"; private static final String nodesTag = "nodes"; private static final String majorVersionAttribute = "major-version"; diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index ecb1212b4b4b..32e830d3e792 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -150,6 +150,8 @@ interface Properties { default Quota quota() { return Quota.unlimited(); } + default List tenantVaults() { return List.of(); } + default List tenantSecretStores() { return List.of(); } // Default setting for the gc-options attribute if not specified explicit by application diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/TenantVault.java b/config-model-api/src/main/java/com/yahoo/config/model/api/TenantVault.java new file mode 100644 index 000000000000..99a87f3936a5 --- /dev/null +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/TenantVault.java @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.config.model.api; + +import java.util.List; + +/** + * @author gjoranv + */ +public record TenantVault(String id, String name, String externalId, List secrets) { + + public record Secret(String id, String name) { } + +} diff --git a/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java b/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java index 7fc1a3962577..debe41bf76d5 100644 --- a/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java +++ b/config-model-api/src/test/java/com/yahoo/config/application/api/DeploymentSpecTest.java @@ -1607,7 +1607,7 @@ public void testDeployableHash() { - + diff --git a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java index 4ef591cda9f7..88335e5cc394 100644 --- a/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java +++ b/config-model/src/main/java/com/yahoo/config/model/admin/AdminModel.java @@ -78,7 +78,9 @@ public BuilderV2() { @Override public void doBuild(AdminModel model, Element adminElement, ConfigModelContext modelContext) { - if (modelContext.getDeployState().isHosted()) { // admin v4 is used on hosted: Build a default V4 instead + // admin v4 is used on hosted: Build a default V4 instead. We want to allow version 2.0 so + // that self-hosted apps deploy without changes. TODO: Warn if tags from version 2.0 are used (and ignored) + if (modelContext.getDeployState().isHosted()) { new BuilderV4().doBuild(model, adminElement, modelContext); return; } diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 8f219b214830..aa4b3b89fa54 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -7,6 +7,7 @@ import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -54,6 +55,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private double feedConcurrency = 0.5; private double feedNiceness = 0.0; private int maxActivationInhibitedOutOfSyncGroups = 0; + private List tenantVaults = List.of(); private List tenantSecretStores = List.of(); private boolean allowDisableMtls = true; private List operatorCertificates = List.of(); @@ -114,6 +116,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public double feedConcurrency() { return feedConcurrency; } @Override public double feedNiceness() { return feedNiceness; } @Override public int maxActivationInhibitedOutOfSyncGroups() { return maxActivationInhibitedOutOfSyncGroups; } + @Override public List tenantVaults() { return tenantVaults; } @Override public List tenantSecretStores() { return tenantSecretStores; } @Override public boolean allowDisableMtls() { return allowDisableMtls; } @Override public List operatorCertificates() { return operatorCertificates; } @@ -278,6 +281,11 @@ public TestProperties maxActivationInhibitedOutOfSyncGroups(int nGroups) { return this; } + public TestProperties setTenantVaults(List tenantVaults) { + this.tenantVaults = List.copyOf(tenantVaults); + return this; + } + public TestProperties setTenantSecretStores(List secretStores) { this.tenantSecretStores = List.copyOf(secretStores); return this; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java index 468cf8dd9610..52ed06b66860 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java @@ -151,7 +151,6 @@ public static boolean isModelIntegrationClass(String className) { com.yahoo.search.searchchain.ForkingSearcher.class.getName(), com.yahoo.search.searchers.CacheControlSearcher.class.getName(), com.yahoo.search.searchers.RateLimitingSearcher.class.getName(), - com.yahoo.vespa.streamingvisitors.MetricsSearcher.class.getName(), com.yahoo.vespa.streamingvisitors.StreamingBackend.class.getName(), ai.vespa.search.llm.LLMSearcher.class.getName(), ai.vespa.search.llm.RAGSearcher.class.getName() diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java index 6819548bea18..9bcf942d159e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudAsmSecrets.java @@ -2,6 +2,8 @@ package com.yahoo.vespa.model.container.xml; import ai.vespa.secret.config.aws.AsmSecretConfig; +import ai.vespa.secret.config.aws.AsmTenantSecretConfig; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.TenantName; @@ -10,35 +12,58 @@ import com.yahoo.vespa.model.container.component.SimpleComponent; import java.net.URI; +import java.util.List; /** * @author lesters */ -public class CloudAsmSecrets extends SimpleComponent implements AsmSecretConfig.Producer { +public class CloudAsmSecrets extends SimpleComponent implements + AsmSecretConfig.Producer, + AsmTenantSecretConfig.Producer { - private static final String CLASS = "ai.vespa.secret.aws.AsmTenantSecretReader"; + static final String CLASS = "ai.vespa.secret.aws.AsmTenantSecretReader"; private static final String BUNDLE = "jdisc-cloud-aws"; private final URI ztsUri; private final AthenzDomain athenzDomain; private final SystemName system; private final TenantName tenant; + private final List tenantVaults; public CloudAsmSecrets(URI ztsUri, AthenzDomain athenzDomain, - SystemName system, TenantName tenant) { + SystemName system, TenantName tenant, + List tenantVaults) { super(new ComponentModel(BundleInstantiationSpecification.fromStrings(CLASS, CLASS, BUNDLE))); this.ztsUri = ztsUri; this.athenzDomain = athenzDomain; this.system = system; this.tenant = tenant; + this.tenantVaults = tenantVaults; } @Override public void getConfig(AsmSecretConfig.Builder builder) { builder.ztsUri(ztsUri.toString()) .athenzDomain(athenzDomain.value()) - .system(system.value()) + .refreshInterval(1); // 1 minute + } + + @Override + public void getConfig(AsmTenantSecretConfig.Builder builder) { + builder.system(system.value()) .tenant(tenant.value()); + + tenantVaults.forEach(vault -> builder.vaults( + vaultBuilder -> { vaultBuilder + .id(vault.id()) + .name(vault.name()) + .externalId(vault.externalId()); + + vault.secrets().forEach(secret -> vaultBuilder.secrets( + secretBuilder -> secretBuilder + .id(secret.id()) + .name(secret.name()))); + })); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudSecrets.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudSecrets.java index 50f499766795..b3332c475838 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudSecrets.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudSecrets.java @@ -14,7 +14,7 @@ */ public class CloudSecrets extends SimpleComponent implements SecretsConfig.Producer { - private static final String CLASS = "ai.vespa.secret.aws.SecretsImpl"; + static final String CLASS = "ai.vespa.secret.aws.SecretsImpl"; private static final String BUNDLE = "jdisc-cloud-aws"; private final List secrets = new ArrayList<>(); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 72ac906b8e00..d88a131d9a40 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -318,7 +318,8 @@ private void addSecrets(ApplicationContainerCluster cluster, Element spec, Deplo cluster.addComponent(new CloudAsmSecrets(deployState.getProperties().ztsUrl(), deployState.getProperties().tenantSecretDomain(), deployState.zone().system(), - deployState.getProperties().applicationId().tenant())); + deployState.getProperties().applicationId().tenant(), + deployState.getProperties().tenantVaults())); } } diff --git a/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java b/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java index d77722bdd31b..aad915cead9f 100644 --- a/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/SchemaTestCase.java @@ -3,12 +3,16 @@ import com.yahoo.config.model.deploy.DeployState; import com.yahoo.document.Document; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.config.DocumentmanagerConfig; import com.yahoo.schema.derived.DerivedConfiguration; import com.yahoo.schema.derived.SchemaInfo; import com.yahoo.schema.document.Stemming; import com.yahoo.schema.parser.ParseException; import com.yahoo.schema.processing.ImportedFieldsResolver; import com.yahoo.schema.processing.OnnxModelTypeResolver; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; +import com.yahoo.vespa.configmodel.producers.DocumentManager; import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; import com.yahoo.vespa.indexinglanguage.expressions.Expression; @@ -18,6 +22,8 @@ import com.yahoo.vespa.model.test.utils.DeployLoggerStub; import org.junit.jupiter.api.Test; +import java.util.List; + import static com.yahoo.config.model.test.TestUtil.joinLines; import static org.junit.jupiter.api.Assertions.*; @@ -486,19 +492,31 @@ void testInheritingMultipleRankProfilesWithOverlappingConstructsIsDisallowed2() void testDeriving() throws Exception { String schema = """ - schema test { - field my_hash type long { - indexing: input my_string | hash | attribute + schema page { + + field domain_hash type long { + indexing: input domain | hash | attribute } - document test { - field my_string type string { + + document page { + + field domain type string { + indexing: index | summary + match: word + rank: filter } } }"""; ApplicationBuilder builder = new ApplicationBuilder(new DeployLoggerStub()); builder.addSchema(schema); var application = builder.build(false); // validate=false to test config deriving without validation - new DerivedConfiguration(application.schemas().get("test"), application.rankProfileRegistry()); + var derived = new DerivedConfiguration(application.schemas().get("page"), application.rankProfileRegistry()); + var ilConfig = new IlscriptsConfig.Builder(); + derived.getIndexingScript().getConfig(ilConfig); + + var documentModel = new DocumentModelBuilder(); + var documentManager = documentModel.build(List.of(application.schemas().get("page"))); + var documentConfig = new DocumentManager().produce(documentManager, new DocumentmanagerConfig.Builder()); } private void assertInheritedFromParent(Schema schema, RankProfileRegistry rankProfileRegistry) { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java index c6d68be069e5..488d9209db8e 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/SecretsTest.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.model.container.xml; import ai.vespa.secret.config.SecretsConfig; +import ai.vespa.secret.config.aws.AsmTenantSecretConfig; import com.yahoo.component.ComponentId; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.model.builder.xml.test.DomBuilderTest; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.deploy.TestProperties; @@ -15,6 +17,8 @@ import org.junit.jupiter.api.Test; import org.w3c.dom.Element; +import java.util.List; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; @@ -23,45 +27,96 @@ */ public class SecretsTest extends ContainerModelBuilderTestBase { - private static final String IMPL_ID = "ai.vespa.secret.aws.SecretsImpl"; + private static final String SECRETS_IMPL_ID = CloudSecrets.CLASS; + @Test void testCloudSecretsNeedHosted() { - Element clusterElem = DomBuilderTest.parse( - "", - " ", - " ", - " ", - ""); - createModel(root, clusterElem); + createModel(root, containerXml()); ApplicationContainerCluster container = getContainerCluster("container"); - Component component = container.getComponentsMap().get(ComponentId.fromString(IMPL_ID)); + Component component = container.getComponentsMap().get(ComponentId.fromString(SECRETS_IMPL_ID)); assertNull(component); } @Test void testSecretsCanBeSetUp() { - Element clusterElem = DomBuilderTest.parse( - "", - " ", - " ", - " ", - ""); DeployState state = new DeployState.Builder() .properties(new TestProperties().setHostedVespa(true)) .zone(new Zone(SystemName.Public, Environment.prod, RegionName.defaultName())) .build(); - createModel(root, state, null, clusterElem); + createModel(root, state, null, containerXml()); + ApplicationContainerCluster container = getContainerCluster("container"); + assertComponentConfigured(container, SECRETS_IMPL_ID); + var secretsConfig = getSecretsConfig(container); + + assertEquals(1, secretsConfig.secret().size()); + assertEquals("openai-apikey", secretsConfig.secret("openAiApiKey").name()); + } + + @Test + void tenant_vaults_are_propagated_in_config() { + var tenantVaults = List.of( + new TenantVault("id1", "name1", "externalId1", List.of()), + new TenantVault("id2", "name2", "externalId2", + List.of(new TenantVault.Secret("sId1", "sName1")))); + + var deployState = new DeployState.Builder() + .properties(new TestProperties() + .setHostedVespa(true) + .setTenantVaults(tenantVaults)) + .zone(new Zone(SystemName.Public, Environment.prod, RegionName.defaultName())) + .build(); + + createModel(root, deployState, null, containerXml()); ApplicationContainerCluster container = getContainerCluster("container"); - assertComponentConfigured(container, IMPL_ID); - CloudSecrets secrets = (CloudSecrets) container.getComponentsMap().get(ComponentId.fromString(IMPL_ID)); + + var config = getAsmTenantSecretConfig(container); + assertEquals(SystemName.Public.value(), config.system()); + assertEquals("default", config.tenant()); + + var vaults = config.vaults(); + assertEquals(2, vaults.size()); + + var vault1 = vaults.get(0); + assertEquals("id1", vault1.id()); + assertEquals("name1", vault1.name()); + assertEquals("externalId1", vault1.externalId()); + assertEquals(0, vault1.secrets().size()); + + var vault2 = vaults.get(1); + assertEquals("id2", vault2.id()); + assertEquals("name2", vault2.name()); + assertEquals("externalId2", vault2.externalId()); + assertEquals(1, vault2.secrets().size()); + + var secret = vault2.secrets().get(0); + assertEquals("sId1", secret.id()); + assertEquals("sName1", secret.name()); + } + + private static AsmTenantSecretConfig getAsmTenantSecretConfig(ApplicationContainerCluster container) { + var secrets = (CloudAsmSecrets) container.getComponentsMap().get(ComponentId.fromString(CloudAsmSecrets.CLASS)); + + AsmTenantSecretConfig.Builder configBuilder = new AsmTenantSecretConfig.Builder(); + secrets.getConfig(configBuilder); + return configBuilder.build(); + } + + private static SecretsConfig getSecretsConfig(ApplicationContainerCluster container) { + var secrets = (CloudSecrets) container.getComponentsMap().get(ComponentId.fromString(SECRETS_IMPL_ID)); SecretsConfig.Builder configBuilder = new SecretsConfig.Builder(); secrets.getConfig(configBuilder); - SecretsConfig secretsConfig = configBuilder.build(); + return configBuilder.build(); + } - assertEquals(1, secretsConfig.secret().size()); - assertEquals("openai-apikey", secretsConfig.secret("openAiApiKey").name()); + private static Element containerXml() { + return DomBuilderTest.parse( + "", + " ", + " ", + " ", + ""); } } diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ActivationContext.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ActivationContext.java index 5e28f61d41fc..74c7a133f475 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/ActivationContext.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ActivationContext.java @@ -9,12 +9,16 @@ public class ActivationContext { private final long generation; + private final boolean isBootstrap; - public ActivationContext(long generation) { + public ActivationContext(long generation, boolean isBootstrap) { this.generation = generation; + this.isBootstrap = isBootstrap; } /** Returns the application config generation we are activating */ public long generation() { return generation; } + /** Returns true if this deployment is done to bootstrap the config server */ + public boolean isBootstrap() { return isBootstrap; } } diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java index 3b2fa0df14ad..6c856baeb3e9 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java @@ -19,6 +19,11 @@ public class CapacityPolicies { public record Tuning(Architecture adminClusterArchitecture, double logserverMemoryGiB, double clusterControllerMemoryGiB) { + + public Tuning(Architecture adminClusterArchitecture, double logserverMemoryGiB) { + this(adminClusterArchitecture, logserverMemoryGiB, 0.0); + } + double logserverMem(double v) { double override = logserverMemoryGiB(); return (override > 0) ? override : v; diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 95ccd9593843..eca73af43d70 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -34,7 +34,6 @@ import com.yahoo.config.provision.Zone; import com.yahoo.config.provision.exception.ActivationConflictException; import com.yahoo.container.jdisc.HttpResponse; -import com.yahoo.container.jdisc.SecretStoreProvider; import com.yahoo.container.jdisc.secretstore.SecretStore; import com.yahoo.docproc.jdisc.metric.NullMetric; import com.yahoo.io.IOUtils; @@ -113,7 +112,15 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; -import java.util.*; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.UnaryOperator; import java.util.logging.Level; @@ -873,7 +880,7 @@ public TesterSuspendedException(String message) { // ---------------- Session operations ---------------------------------------------------------------- - public Activation activate(Session session, ApplicationId applicationId, Tenant tenant, boolean force) { + public Activation activate(Session session, ApplicationId applicationId, Tenant tenant, boolean isBootstrap, boolean force) { NestedTransaction transaction = new NestedTransaction(); Optional applicationTransaction = hostProvisioner.map(provisioner -> provisioner.lock(applicationId)) .map(lock -> new ApplicationTransaction(lock, transaction)); @@ -885,7 +892,7 @@ public Activation activate(Session session, ApplicationId applicationId, Tenant transaction.add(deactivateCurrentActivateNew(activeSession, session, force)); if (applicationTransaction.isPresent()) { hostProvisioner.get().activate(session.getAllocatedHosts().getHosts(), - new ActivationContext(session.getSessionId()), + new ActivationContext(session.getSessionId(), isBootstrap), applicationTransaction.get()); applicationTransaction.get().nested().commit(); } else { @@ -954,9 +961,10 @@ public long createSession(ApplicationId applicationId, TimeoutBudget timeoutBudg return session.getSessionId(); } - public void deleteExpiredSessions() { + public void deleteExpiredSessions(int maxSessionsToDelete) { tenantRepository.getAllTenants() - .forEach(tenant -> tenant.getSessionRepository().deleteExpiredRemoteAndLocalSessions(session -> sessionIsActiveForItsApplication(tenant, session))); + .forEach(tenant -> tenant.getSessionRepository().deleteExpiredRemoteAndLocalSessions(session -> sessionIsActiveForItsApplication(tenant, session), + maxSessionsToDelete)); } private boolean sessionIsActiveForItsApplication(Tenant tenant, Session session) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java index aec6fd8f1fe5..2e87ee52d5a6 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java @@ -129,7 +129,7 @@ public long activate() { TimeoutBudget timeoutBudget = params.get().getTimeoutBudget(); timeoutBudget.assertNotTimedOut(() -> "Timeout exceeded when trying to activate '" + applicationId + "'"); - Activation activation = applicationRepository.activate(session, applicationId, tenant, params.get().force()); + Activation activation = applicationRepository.activate(session, applicationId, tenant, params.get().isBootstrap(), params.get().force()); waitForActivation(applicationId, timeoutBudget, activation); restartServicesIfNeeded(applicationId); storeReindexing(applicationId); @@ -249,12 +249,14 @@ private static Supplier createPrepareParams( PrepareParams.Builder params = new PrepareParams.Builder() .applicationId(session.getApplicationId()) .vespaVersion(session.getVespaVersion().toString()) + .vespaVersionToBuildFirst(session.getVersionToBuildFirst()) .timeoutBudget(timeoutBudget) .ignoreValidationErrors(ignoreValidationErrors) .isBootstrap(isBootstrap) .isInternalRedeployment(isInternalRedeployment) .force(force) .waitForResourcesInPrepare(waitForResourcesInPrepare) + .tenantVaults(session.getTenantVaults()) .tenantSecretStores(session.getTenantSecretStores()) .dataplaneTokens(session.getDataplaneTokens()); session.getDockerImageRepository().ifPresent(params::dockerImageRepository); @@ -269,7 +271,7 @@ private static void waitForResourcesOrTimeout(PrepareParams params, Session sess if (!params.waitForResourcesInPrepare() || provisioner.isEmpty()) return; Set preparedHosts = session.getAllocatedHosts().getHosts(); - ActivationContext context = new ActivationContext(session.getSessionId()); + ActivationContext context = new ActivationContext(session.getSessionId(), params.isBootstrap()); AtomicReference lastException = new AtomicReference<>(); while (true) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index c81a3a4c4475..030121627aed 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -18,6 +18,7 @@ import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.Reindexing; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -350,6 +351,7 @@ public static class Properties implements ModelContext.Properties { private final Optional endpointCertificateSecrets; private final Optional athenzDomain; private final Quota quota; + private final List tenantVaults; private final List tenantSecretStores; private final SecretStore secretStore; private final StringFlag jvmGCOptionsFlag; @@ -376,6 +378,7 @@ public Properties(ApplicationId applicationId, Optional endpointCertificateSecrets, Optional athenzDomain, Optional maybeQuota, + List tenantVaults, List tenantSecretStores, SecretStore secretStore, List operatorCertificates, @@ -397,6 +400,7 @@ public Properties(ApplicationId applicationId, this.endpointCertificateSecrets = endpointCertificateSecrets; this.athenzDomain = athenzDomain; this.quota = maybeQuota.orElseGet(Quota::unlimited); + this.tenantVaults = tenantVaults; this.tenantSecretStores = tenantSecretStores; this.secretStore = secretStore; this.jvmGCOptionsFlag = PermanentFlags.JVM_GC_OPTIONS.bindTo(flagSource) @@ -412,7 +416,7 @@ public Properties(ApplicationId applicationId, this.endpointConnectionTtl = Duration.ofSeconds(PermanentFlags.ENDPOINT_CONNECTION_TTL.bindTo(flagSource).with(applicationId).value()); this.dataplaneTokens = dataplaneTokens; this.requestPrefixForLoggingContent = PermanentFlags.LOG_REQUEST_CONTENT.bindTo(flagSource).with(applicationId).value(); - this.launchApplicationAthenzService = Flags.LAUNCH_APPLICATION_ATHENZ_SERVICE.bindTo(flagSource).with(applicationId).value(); + this.launchApplicationAthenzService = Flags.LAUNCH_APPLICATION_ATHENZ_SERVICE.bindTo(flagSource).with(applicationId).with(modelVersion).value(); } @Override public ModelContext.FeatureFlags featureFlags() { return featureFlags; } @@ -469,6 +473,11 @@ public String athenzDnsSuffix() { @Override public Quota quota() { return quota; } + @Override + public List tenantVaults() { + return tenantVaults; + } + @Override public List tenantSecretStores() { return tenantSecretStores; diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java index b584efbd55d8..1430b9002ce8 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java @@ -13,14 +13,23 @@ */ public class SessionsMaintainer extends ConfigServerMaintainer { + private final int maxSessionsToDelete; + SessionsMaintainer(ApplicationRepository applicationRepository, Curator curator, Duration interval) { super(applicationRepository, curator, applicationRepository.flagSource(), applicationRepository.clock(), interval, true, true); + this.maxSessionsToDelete = 50; + } + + SessionsMaintainer(ApplicationRepository applicationRepository, Curator curator, Duration interval, int maxSessionsToDelete) { + super(applicationRepository, curator, applicationRepository.flagSource(), applicationRepository.clock(), + interval, true, true); + this.maxSessionsToDelete = maxSessionsToDelete; } @Override protected double maintain() { - applicationRepository.deleteExpiredSessions(); + applicationRepository.deleteExpiredSessions(maxSessionsToDelete); return 1.0; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java index 5017f25b2f83..6fc5a0881872 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ActivatedModelsBuilder.java @@ -14,7 +14,6 @@ import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.application.provider.MockFileRegistry; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; @@ -35,9 +34,6 @@ import com.yahoo.vespa.config.server.tenant.TenantRepository; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.model.VespaModel; -import com.yahoo.vespa.model.container.ApplicationContainerCluster; -import com.yahoo.vespa.model.content.cluster.ContentCluster; import java.util.Comparator; import java.util.List; @@ -168,6 +164,7 @@ private ModelContext.Properties createModelContextProperties(ApplicationId appli .flatMap(new EndpointCertificateRetriever(endpointCertificateSecretStores)::readEndpointCertificateSecrets), zkClient.readAthenzDomain(), zkClient.readQuota(), + zkClient.readTenantVaults(), zkClient.readTenantSecretStores(), secretStore, zkClient.readOperatorCertificates(), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java index 129e6c5f9c61..76a2405767de 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/ModelsBuilder.java @@ -86,42 +86,33 @@ public abstract class ModelsBuilder { public List buildModels(ApplicationId applicationId, Optional dockerImageRepository, Version wantedNodeVespaVersion, + Optional versionToBuildFirst, ApplicationPackage applicationPackage, AllocatedHostsFromAllModels allocatedHosts, Instant now) { Instant start = Instant.now(); log.log(Level.FINE, () -> "Will build models for " + applicationId); - Set versions = modelFactoryRegistry.allVersions(); - - // If the application specifies a major, skip models on a newer major - Optional requestedMajorVersion = applicationPackage.getMajorVersion(); - if (requestedMajorVersion.isPresent()) { - versions = keepUpToMajorVersion(requestedMajorVersion.get(), versions); - if (versions.isEmpty()) - throw new UnknownVespaVersionException("No Vespa versions on or before major version " + - requestedMajorVersion.get() + " are present"); - } + Set versions = findVersionsToBuild(applicationPackage); // Load models one major version at a time (in reverse order) as new major versions are allowed // to be non-loadable in the case where an existing application is incompatible with a new // major version (which is possible by the definition of major) - List majorVersions = versions.stream() - .map(Version::getMajor) - .distinct() - .sorted(Comparator.reverseOrder()) - .toList(); + List majorVersions = majorVersionsNewestFirst(versions); - List allApplicationModels = new ArrayList<>(); + List builtModels = new ArrayList<>(); // Build latest model for latest major only, if that fails build latest model for previous major boolean buildLatestModelForThisMajor = true; for (int i = 0; i < majorVersions.size(); i++) { int majorVersion = majorVersions.get(i); + log.log(Level.FINE, "Building major " + majorVersion + ", versionToBuildFirst=" + versionToBuildFirst); try { - allApplicationModels.addAll(buildModelVersions(keepMajorVersion(majorVersion, versions), - applicationId, dockerImageRepository, wantedNodeVespaVersion, - applicationPackage, allocatedHosts, now, - buildLatestModelForThisMajor, majorVersion)); + builtModels.addAll(buildModelVersions(keepMajorVersion(majorVersion, versions), + applicationId, dockerImageRepository, wantedNodeVespaVersion, + applicationPackage, allocatedHosts, now, + buildLatestModelForThisMajor, + versionToBuildFirst, majorVersion)); buildLatestModelForThisMajor = false; // We have successfully built latest model version, do it only for this major + versionToBuildFirst = Optional.empty(); // Set to empty, cannot build this first on another major } catch (NodeAllocationException | ApplicationLockException | TransientException | QuotaExceededException e) { // Don't wrap this exception, and don't try to load other model versions as this is (most likely) @@ -146,12 +137,34 @@ public List buildModels(ApplicationId applicationId, } } log.log(Level.FINE, () -> "Done building models for " + applicationId + ". Built models for versions " + - allApplicationModels.stream() - .map(result -> result.getModel().version()) - .map(Version::toFullString) - .collect(Collectors.toSet()) + + builtModels.stream() + .map(result -> result.getModel().version()) + .map(Version::toFullString) + .collect(Collectors.toSet()) + " in " + Duration.between(start, Instant.now())); - return allApplicationModels; + return builtModels; + } + + private Set findVersionsToBuild(ApplicationPackage applicationPackage) { + Set versions = modelFactoryRegistry.allVersions(); + + // If the application specifies a major, skip models on a newer major + Optional requestedMajorVersion = applicationPackage.getMajorVersion(); + if (requestedMajorVersion.isPresent()) { + versions = keepUpToMajorVersion(requestedMajorVersion.get(), versions); + if (versions.isEmpty()) + throw new UnknownVespaVersionException("No Vespa versions on or before major version " + + requestedMajorVersion.get() + " are present"); + } + return versions; + } + + private static List majorVersionsNewestFirst(Set versions) { + return versions.stream() + .map(Version::getMajor) + .distinct() + .sorted(Comparator.reverseOrder()) + .toList(); } private boolean shouldSkipCreatingMajorVersionOnError(List majorVersions, Integer majorVersion, Version wantedVersion, @@ -177,25 +190,24 @@ private List buildModelVersions(Set versions, AllocatedHostsFromAllModels allocatedHosts, Instant now, boolean buildLatestModelForThisMajor, + Optional versionToBuildFirst, int majorVersion) { - List builtModelVersions = new ArrayList<>(); - Optional latest = Optional.empty(); + List built = new ArrayList<>(); if (buildLatestModelForThisMajor) { - latest = Optional.of(findLatest(versions)); - // load latest application version - MODELRESULT latestModelVersion = buildModelVersion(modelFactoryRegistry.getFactory(latest.get()), - applicationPackage, - applicationId, - wantedDockerImageRepository, - wantedNodeVespaVersion); - allocatedHosts.add(latestModelVersion.getModel().allocatedHosts(), latest.get()); - builtModelVersions.add(latestModelVersion); + if (versionToBuildFirst.isEmpty()) + versionToBuildFirst = Optional.of(findLatest(versions)); + var builtFirst = buildModelVersion(modelFactoryRegistry.getFactory(versionToBuildFirst.get()), + applicationPackage, + applicationId, + wantedDockerImageRepository, + wantedNodeVespaVersion); + allocatedHosts.add(builtFirst.getModel().allocatedHosts(), versionToBuildFirst.get()); + built.add(builtFirst); } - // load old model versions versions = versionsToBuild(versions, wantedNodeVespaVersion, majorVersion, allocatedHosts); for (Version version : versions) { - if (latest.isPresent() && version.equals(latest.get())) continue; // already loaded + if (alreadyBuilt(version, built)) continue; try { MODELRESULT modelVersion = buildModelVersion(modelFactoryRegistry.getFactory(version), @@ -204,13 +216,10 @@ private List buildModelVersions(Set versions, wantedDockerImageRepository, wantedNodeVespaVersion); allocatedHosts.add(modelVersion.getModel().allocatedHosts(), version); - builtModelVersions.add(modelVersion); + built.add(modelVersion); } catch (RuntimeException e) { - // allow failure to create old config models if there is a validation override that allow skipping old - // config models, or we're manually deploying - if (builtModelVersions.size() > 0 && - ( builtModelVersions.get(0).getModel().skipOldConfigModels(now) || zone().environment().isManuallyDeployed())) - log.log(Level.WARNING, applicationId + ": Failed to build version " + version + + if (allowBuildToFail(now, built)) + log.log(Level.INFO, applicationId + ": Failed to build version " + version + ", but allow failure due to validation override or manual deployment:" + Exceptions.toMessageString(e)); else { @@ -219,7 +228,22 @@ private List buildModelVersions(Set versions, } } } - return builtModelVersions; + return built; + } + + /** + * Allow build of other config models to fail if there is a validation override that allow skipping old + * config models, or we're manually deploying + */ + private boolean allowBuildToFail(Instant now, List built) { + return ! built.isEmpty() && + (built.get(0).getModel().skipOldConfigModels(now) || zone().environment().isManuallyDeployed()); + } + + private static boolean alreadyBuilt(Version version, List built) { + return built.stream() + .map(modelresult -> modelresult.getModel().version()) + .anyMatch(version::equals); } private Set versionsToBuild(Set versions, Version wantedVersion, int majorVersion, diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java index fd8728ac655d..d2d7b74bdabe 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/modelfactory/PreparedModelsBuilder.java @@ -19,7 +19,6 @@ import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.api.ValidationParameters; -import com.yahoo.config.model.api.ValidationParameters.IgnoreValidationErrors; import com.yahoo.config.model.application.provider.FilesApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.provision.AllocatedHosts; @@ -49,6 +48,8 @@ import java.util.logging.Level; import java.util.logging.Logger; +import static com.yahoo.config.model.api.ValidationParameters.IgnoreValidationErrors.FALSE; +import static com.yahoo.config.model.api.ValidationParameters.IgnoreValidationErrors.TRUE; import static com.yahoo.yolean.Exceptions.toMessageString; import static java.util.logging.Level.FINE; @@ -142,9 +143,8 @@ private ModelCreateResult createAndValidateModel(ModelFactory modelFactory, ModelContext modelContext) { log.log(FINE, () -> "Create and validate model " + modelVersion + " for " + applicationId + ", previous model " + (modelOf(modelVersion).isPresent() ? " exists" : "does not exist")); - ValidationParameters validationParameters = - new ValidationParameters(params.ignoreValidationErrors() ? IgnoreValidationErrors.TRUE : IgnoreValidationErrors.FALSE); - ModelCreateResult result = modelFactory.createAndValidateModel(modelContext, validationParameters); + var validationParameters = new ValidationParameters(params.ignoreValidationErrors() ? TRUE : FALSE); + var result = modelFactory.createAndValidateModel(modelContext, validationParameters); validateModelHosts(hostValidator, applicationId, result.getModel()); log.log(FINE, () -> "Done building model " + modelVersion + " for " + applicationId); params.getTimeoutBudget().assertNotTimedOut(() -> "prepare timed out after building model " + modelVersion + @@ -222,6 +222,7 @@ private ModelContext.Properties createModelContextProperties(Version modelVersio endpointCertificateSecrets, params.athenzDomain(), params.quota(), + params.tenantVaults(), params.tenantSecretStores(), secretStore, params.operatorCertificates(), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java index ec24cc17284f..e7cbf5245366 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/PrepareParams.java @@ -6,6 +6,7 @@ import com.yahoo.config.model.api.EndpointCertificateMetadata; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -24,6 +25,7 @@ import com.yahoo.vespa.config.server.tenant.DataplaneTokenSerializer; import com.yahoo.vespa.config.server.tenant.EndpointCertificateMetadataSerializer; import com.yahoo.vespa.config.server.tenant.TenantSecretStoreSerializer; +import com.yahoo.vespa.config.server.tenant.TenantVaultSerializer; import java.security.cert.X509Certificate; import java.time.Clock; @@ -32,7 +34,6 @@ import java.util.Objects; import java.util.Optional; import java.util.function.Function; -import java.util.stream.Collectors; /** * Parameters for preparing an application. Immutable. @@ -47,11 +48,13 @@ public final class PrepareParams { static final String DRY_RUN_PARAM_NAME = "dryRun"; static final String VERBOSE_PARAM_NAME = "verbose"; static final String VESPA_VERSION_PARAM_NAME = "vespaVersion"; + static final String VESPA_VERSION_TO_BUILD_FIRST_PARAM_NAME = "vespaVersionToBuildFirst"; static final String CONTAINER_ENDPOINTS_PARAM_NAME = "containerEndpoints"; static final String ENDPOINT_CERTIFICATE_METADATA_PARAM_NAME = "endpointCertificateMetadata"; static final String DOCKER_IMAGE_REPOSITORY = "dockerImageRepository"; static final String ATHENZ_DOMAIN = "athenzDomain"; static final String QUOTA_PARAM_NAME = "quota"; + static final String TENANT_VAULTS_PARAM_NAME = "tenantVaults"; static final String TENANT_SECRET_STORES_PARAM_NAME = "tenantSecretStores"; static final String FORCE_PARAM_NAME = "force"; static final String WAIT_FOR_RESOURCES_IN_PREPARE = "waitForResourcesInPrepare"; @@ -69,11 +72,13 @@ public final class PrepareParams { private final boolean force; private final boolean waitForResourcesInPrepare; private final Optional vespaVersion; + private final Optional vespaVersionToBuildFirst; private final List containerEndpoints; private final Optional endpointCertificateMetadata; private final Optional dockerImageRepository; private final Optional athenzDomain; private final Optional quota; + private final List tenantVaults; private final List tenantSecretStores; private final List operatorCertificates; private final Optional cloudAccount; @@ -87,11 +92,13 @@ private PrepareParams(ApplicationId applicationId, boolean isBootstrap, boolean isInternalRedeployment, Optional vespaVersion, + Optional vespaVersionToBuildFirst, List containerEndpoints, Optional endpointCertificateMetadata, Optional dockerImageRepository, Optional athenzDomain, Optional quota, + List tenantVaults, List tenantSecretStores, boolean force, boolean waitForResourcesInPrepare, @@ -106,11 +113,13 @@ private PrepareParams(ApplicationId applicationId, this.isBootstrap = isBootstrap; this.isInternalRedeployment = isInternalRedeployment; this.vespaVersion = vespaVersion; + this.vespaVersionToBuildFirst = vespaVersionToBuildFirst; this.containerEndpoints = containerEndpoints; this.endpointCertificateMetadata = endpointCertificateMetadata; this.dockerImageRepository = dockerImageRepository; this.athenzDomain = athenzDomain; this.quota = quota; + this.tenantVaults = tenantVaults; this.tenantSecretStores = tenantSecretStores; this.force = force; this.waitForResourcesInPrepare = waitForResourcesInPrepare; @@ -131,11 +140,13 @@ public static class Builder { private ApplicationId applicationId = null; private TimeoutBudget timeoutBudget = new TimeoutBudget(Clock.systemUTC(), Duration.ofSeconds(60)); private Optional vespaVersion = Optional.empty(); + private Optional vespaVersionToBuildFirst = Optional.empty(); private List containerEndpoints = null; private Optional endpointCertificateMetadata = Optional.empty(); private Optional dockerImageRepository = Optional.empty(); private Optional athenzDomain = Optional.empty(); private Optional quota = Optional.empty(); + private List tenantVaults = List.of(); private List tenantSecretStores = List.of(); private List operatorCertificates = List.of(); private Optional cloudAccount = Optional.empty(); @@ -192,6 +203,19 @@ public Builder vespaVersion(Version vespaVersion) { return this; } + public Builder vespaVersionToBuildFirst(String version) { + Optional v = Optional.empty(); + if (version != null && !version.isEmpty()) { + v = Optional.of(Version.fromString(version)); + } + return vespaVersionToBuildFirst(v); + } + + public Builder vespaVersionToBuildFirst(Optional version) { + this.vespaVersionToBuildFirst = version; + return this; + } + public Builder containerEndpoints(String serialized) { this.containerEndpoints = (serialized == null) ? List.of() @@ -250,6 +274,18 @@ public Builder quota(String serialized) { return this; } + public Builder tenantVaults(String serialized) { + List vaults = (serialized == null) + ? List.of() + : TenantVaultSerializer.listFromSlime(SlimeUtils.jsonToSlime(serialized).get()); + return tenantVaults(vaults); + } + + public Builder tenantVaults(List tenantVaults) { + this.tenantVaults = tenantVaults; + return this; + } + public Builder tenantSecretStores(String serialized) { List secretStores = (serialized == null) ? List.of() @@ -296,11 +332,13 @@ public PrepareParams build() { isBootstrap, isInternalRedeployment, vespaVersion, + vespaVersionToBuildFirst, containerEndpoints, endpointCertificateMetadata, dockerImageRepository, athenzDomain, quota, + tenantVaults, tenantSecretStores, force, waitForResourcesInPrepare, @@ -318,11 +356,13 @@ public static PrepareParams fromHttpRequest(HttpRequest request, TenantName tena .timeoutBudget(SessionHandler.getTimeoutBudget(request, barrierTimeout)) .applicationId(createApplicationId(request, tenant)) .vespaVersion(request.getProperty(VESPA_VERSION_PARAM_NAME)) + .vespaVersionToBuildFirst(request.getProperty(VESPA_VERSION_TO_BUILD_FIRST_PARAM_NAME)) .containerEndpoints(request.getProperty(CONTAINER_ENDPOINTS_PARAM_NAME)) .endpointCertificateMetadata(request.getProperty(ENDPOINT_CERTIFICATE_METADATA_PARAM_NAME)) .dockerImageRepository(request.getProperty(DOCKER_IMAGE_REPOSITORY)) .athenzDomain(request.getProperty(ATHENZ_DOMAIN)) .quota(request.getProperty(QUOTA_PARAM_NAME)) + .tenantVaults(request.getProperty(TENANT_VAULTS_PARAM_NAME)) .tenantSecretStores(request.getProperty(TENANT_SECRET_STORES_PARAM_NAME)) .force(request.getBooleanProperty(FORCE_PARAM_NAME)) .waitForResourcesInPrepare(request.getBooleanProperty(WAIT_FOR_RESOURCES_IN_PREPARE)) @@ -340,11 +380,13 @@ public static PrepareParams fromJson(byte[] json, TenantName tenant, Duration ba .timeoutBudget(SessionHandler.getTimeoutBudget(getTimeout(params, barrierTimeout))) .applicationId(createApplicationId(params, tenant)) .vespaVersion(SlimeUtils.optionalString(params.field(VESPA_VERSION_PARAM_NAME)).orElse(null)) + .vespaVersionToBuildFirst(SlimeUtils.optionalString(params.field(VESPA_VERSION_TO_BUILD_FIRST_PARAM_NAME)).orElse(null)) .containerEndpointList(deserialize(params.field(CONTAINER_ENDPOINTS_PARAM_NAME), ContainerEndpointSerializer::endpointListFromSlime, List.of())) .endpointCertificateMetadata(deserialize(params.field(ENDPOINT_CERTIFICATE_METADATA_PARAM_NAME), EndpointCertificateMetadataSerializer::fromSlime)) .dockerImageRepository(SlimeUtils.optionalString(params.field(DOCKER_IMAGE_REPOSITORY)).orElse(null)) .athenzDomain(SlimeUtils.optionalString(params.field(ATHENZ_DOMAIN)).orElse(null)) .quota(deserialize(params.field(QUOTA_PARAM_NAME), Quota::fromSlime)) + .tenantVaults(deserialize(params.field(TENANT_VAULTS_PARAM_NAME), TenantVaultSerializer::listFromSlime, List.of())) .tenantSecretStores(deserialize(params.field(TENANT_SECRET_STORES_PARAM_NAME), TenantSecretStoreSerializer::listFromSlime, List.of())) .force(booleanValue(params, FORCE_PARAM_NAME)) .waitForResourcesInPrepare(booleanValue(params, WAIT_FOR_RESOURCES_IN_PREPARE)) @@ -417,6 +459,9 @@ public String getApplicationName() { /** Returns the Vespa version the nodes running the prepared system should have, or empty to use the system version */ public Optional vespaVersion() { return vespaVersion; } + /** Returns the Vespa version to build first when building several models. A pinned application will have this set */ + public Optional vespaVersionToBuildFirst() { return vespaVersionToBuildFirst; } + /** Returns the container endpoints that should be made available for this deployment. One per cluster */ public List containerEndpoints() { return containerEndpoints; @@ -458,6 +503,10 @@ public Optional quota() { return quota; } + public List tenantVaults() { + return tenantVaults; + } + public List tenantSecretStores() { return tenantSecretStores; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java index 39025aa8374a..9008c837a9f7 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/Session.java @@ -8,6 +8,7 @@ import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.AllocatedHosts; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; @@ -19,6 +20,7 @@ import com.yahoo.transaction.Transaction; import com.yahoo.vespa.config.server.application.ApplicationVersions; import com.yahoo.vespa.config.server.tenant.TenantRepository; + import java.security.cert.X509Certificate; import java.time.Instant; import java.util.List; @@ -130,6 +132,8 @@ public Optional getOptionalApplicationId() { public Version getVespaVersion() { return sessionZooKeeperClient.readVespaVersion(); } + public Optional getVersionToBuildFirst() { return sessionZooKeeperClient.readVersionToBuildFirst(); } + public Optional getAthenzDomain() { return sessionZooKeeperClient.readAthenzDomain(); } public Optional getQuota() { return sessionZooKeeperClient.readQuota(); } @@ -142,6 +146,10 @@ public Transaction createDeactivateTransaction() { return createSetStatusTransaction(Status.DEACTIVATE); } + public List getTenantVaults() { + return sessionZooKeeperClient.readTenantVaults(); + } + public List getTenantSecretStores() { return sessionZooKeeperClient.readTenantSecretStores(); } @@ -188,7 +196,7 @@ public ApplicationFile getApplicationFile(Path relativePath, LocalSession.Mode m return getApplicationPackage().getFile(relativePath); } - Optional applicationVersions() { return Optional.empty(); } + public Optional applicationVersions() { return Optional.empty(); } private void markSessionEdited() { setStatus(Session.Status.NEW); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java index 1757998882e4..e22af1262825 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionData.java @@ -5,6 +5,7 @@ import com.yahoo.config.FileReference; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -16,6 +17,7 @@ import com.yahoo.vespa.config.server.tenant.DataplaneTokenSerializer; import com.yahoo.vespa.config.server.tenant.OperatorCertificateSerializer; import com.yahoo.vespa.config.server.tenant.TenantSecretStoreSerializer; +import com.yahoo.vespa.config.server.tenant.TenantVaultSerializer; import java.io.IOException; import java.security.cert.X509Certificate; @@ -34,10 +36,12 @@ public record SessionData(ApplicationId applicationId, Optional applicationPackageReference, Version version, + Optional versionToBuildFirst, Instant created, Optional dockerImageRepository, Optional athenzDomain, Optional quota, + List tenantVaults, List tenantSecretStores, List operatorCertificates, Optional cloudAccount, @@ -48,10 +52,12 @@ public record SessionData(ApplicationId applicationId, static final String APPLICATION_ID_PATH = "applicationId"; static final String APPLICATION_PACKAGE_REFERENCE_PATH = "applicationPackageReference"; static final String VERSION_PATH = "version"; + static final String VERSION_TO_BUILD_FIRST_PATH = "versionToBuildFirst"; static final String CREATE_TIME_PATH = "createTime"; static final String DOCKER_IMAGE_REPOSITORY_PATH = "dockerImageRepository"; static final String ATHENZ_DOMAIN = "athenzDomain"; static final String QUOTA_PATH = "quota"; + static final String TENANT_VAULTS_PATH = "tenantVaults"; static final String TENANT_SECRET_STORES_PATH = "tenantSecretStores"; static final String OPERATOR_CERTIFICATES_PATH = "operatorCertificates"; static final String CLOUD_ACCOUNT_PATH = "cloudAccount"; @@ -74,11 +80,15 @@ private void toSlime(Cursor object) { object.setString(APPLICATION_ID_PATH, applicationId.serializedForm()); applicationPackageReference.ifPresent(ref -> object.setString(APPLICATION_PACKAGE_REFERENCE_PATH, ref.value())); object.setString(VERSION_PATH, version.toString()); + versionToBuildFirst.ifPresent(v -> object.setString(VERSION_TO_BUILD_FIRST_PATH, v.toString())); object.setLong(CREATE_TIME_PATH, created.toEpochMilli()); dockerImageRepository.ifPresent(image -> object.setString(DOCKER_IMAGE_REPOSITORY_PATH, image.asString())); athenzDomain.ifPresent(domain -> object.setString(ATHENZ_DOMAIN, domain.value())); quota.ifPresent(q -> q.toSlime(object.setObject(QUOTA_PATH))); + Cursor tenantVaultArray = object.setArray(TENANT_VAULTS_PATH); + TenantVaultSerializer.toSlime(tenantVaults, tenantVaultArray); + Cursor tenantSecretStoresArray = object.setArray(TENANT_SECRET_STORES_PATH); TenantSecretStoreSerializer.toSlime(tenantSecretStores, tenantSecretStoresArray); @@ -98,12 +108,16 @@ static SessionData fromSlime(Slime slime) { return new SessionData(ApplicationId.fromSerializedForm(cursor.field(APPLICATION_ID_PATH).asString()), optionalString(cursor.field(APPLICATION_PACKAGE_REFERENCE_PATH)).map(FileReference::new), Version.fromString(cursor.field(VERSION_PATH).asString()), + SlimeUtils.isPresent(cursor.field(VERSION_TO_BUILD_FIRST_PATH)) + ? Optional.of(Version.fromString(cursor.field(VERSION_TO_BUILD_FIRST_PATH).asString())) + : Optional.empty(), Instant.ofEpochMilli(cursor.field(CREATE_TIME_PATH).asLong()), optionalString(cursor.field(DOCKER_IMAGE_REPOSITORY_PATH)).map(DockerImage::fromString), optionalString(cursor.field(ATHENZ_DOMAIN)).map(AthenzDomain::from), SlimeUtils.isPresent(cursor.field(QUOTA_PATH)) ? Optional.of(Quota.fromSlime(cursor.field(QUOTA_PATH))) : Optional.empty(), + TenantVaultSerializer.listFromSlime(cursor.field(TENANT_VAULTS_PATH)), TenantSecretStoreSerializer.listFromSlime(cursor.field(TENANT_SECRET_STORES_PATH)), OperatorCertificateSerializer.fromSlime(cursor.field(OPERATOR_CERTIFICATES_PATH)), optionalString(cursor.field(CLOUD_ACCOUNT_PATH)).map(CloudAccount::from), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java index 7f1d8678ed04..4a3af28f1b42 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionPreparer.java @@ -21,11 +21,11 @@ import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.AllocatedHosts; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; -import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.DataplaneToken; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.InstanceName; @@ -178,6 +178,8 @@ private class Preparation { /** The version of Vespa the application to be prepared specifies for its nodes */ final Version vespaVersion; + /** The version of Vespa to build first when there are several config models, empty if latest version should be built first */ + final Optional vespaVersionToBuildFirst; final ContainerEndpointsCache containerEndpointsCache; final List containerEndpoints; @@ -206,6 +208,7 @@ private class Preparation { this.applicationId = params.getApplicationId(); this.dockerImageRepository = params.dockerImageRepository(); this.vespaVersion = params.vespaVersion().orElse(Vtag.currentVersion); + this.vespaVersionToBuildFirst = params.vespaVersionToBuildFirst(); this.containerEndpointsCache = new ContainerEndpointsCache(tenantPath, curator); this.endpointCertificateMetadataStore = new EndpointCertificateMetadataStore(curator, tenantPath); EndpointCertificateRetriever endpointCertificateRetriever = new EndpointCertificateRetriever(endpointCertificateSecretStores); @@ -337,7 +340,8 @@ void vespaPreprocess(File appDir, File inputXml, ApplicationMetaData metaData, T AllocatedHosts buildModels(Instant now) { var allocatedHosts = new AllocatedHostsFromAllModels(); this.modelResultList = preparedModelsBuilder.buildModels(applicationId, dockerImageRepository, vespaVersion, - preprocessedApplicationPackage, allocatedHosts, now); + vespaVersionToBuildFirst, preprocessedApplicationPackage, + allocatedHosts, now); checkTimeout("build models"); return allocatedHosts.toAllocatedHosts(); } @@ -357,11 +361,13 @@ void writeStateZK(FileReference filereference) { Optional.of(filereference), dockerImageRepository, vespaVersion, + vespaVersionToBuildFirst, logger, prepareResult.getFileRegistries(), prepareResult.allocatedHosts(), athenzDomain, params.quota(), + params.tenantVaults(), params.tenantSecretStores(), params.operatorCertificates(), params.cloudAccount(), @@ -401,11 +407,13 @@ private void writeStateToZooKeeper(SessionZooKeeperClient zooKeeperClient, Optional fileReference, Optional dockerImageRepository, Version vespaVersion, + Optional versionToBuildFirst, DeployLogger deployLogger, Map fileRegistryMap, AllocatedHosts allocatedHosts, Optional athenzDomain, Optional quota, + List tenantVaults, List tenantSecretStores, List operatorCertificates, Optional cloudAccount, @@ -420,8 +428,10 @@ private void writeStateToZooKeeper(SessionZooKeeperClient zooKeeperClient, fileReference, dockerImageRepository, vespaVersion, + versionToBuildFirst, athenzDomain, quota, + tenantVaults, tenantSecretStores, operatorCertificates, cloudAccount, diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java index 4787b76e4060..291fe7b37a0a 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java @@ -14,7 +14,6 @@ import com.yahoo.config.model.application.provider.DeployData; import com.yahoo.config.model.application.provider.FilesApplicationPackage; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; import com.yahoo.container.jdisc.secretstore.SecretStore; @@ -238,11 +237,11 @@ private LocalSession getSessionFromFile(long sessionId) { return new LocalSession(tenantName, sessionId, applicationPackage, sessionZKClient); } - public Set getLocalSessionsIdsFromFileSystem() { + public List getLocalSessionsIdsFromFileSystem() { File[] sessions = tenantFileSystemDirs.sessionsPath().listFiles(sessionApplicationsFilter); - if (sessions == null) return Set.of(); + if (sessions == null) return List.of(); - Set sessionIds = new HashSet<>(); + List sessionIds = new ArrayList<>(); for (File session : sessions) { long sessionId = Long.parseLong(session.getName()); sessionIds.add(sessionId); @@ -548,6 +547,7 @@ private ApplicationVersions loadApplication(Session session, Optional log.info("Existing tenant secret store:\n" + ss)); - } SessionSerializer sessionSerializer = new SessionSerializer(); sessionSerializer.write(session.getSessionZooKeeperClient(), applicationId, @@ -590,9 +585,11 @@ private void write(Session existingSession, LocalSession session, ApplicationId existingSession.getApplicationPackageReference(), existingSession.getDockerImageRepository(), existingSession.getVespaVersion(), + existingSession.getVersionToBuildFirst(), existingSession.getAthenzDomain(), existingSession.getQuota(), - tenantSecretStores, + existingSession.getTenantVaults(), + existingSession.getTenantSecretStores(), existingSession.getOperatorCertificates(), existingSession.getCloudAccount(), existingSession.getDataplaneTokens(), @@ -606,26 +603,43 @@ public SessionData read(Session session) { // ---------------- Common stuff ---------------------------------------------------------------- - public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActiveForApplication) { + public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActiveForApplication, int maxSessionsToDelete) { // All known sessions, both local (file) and remote (zookeeper) - Set sessions = getLocalSessionsIdsFromFileSystem(); + List sessions = getLocalSessionsIdsFromFileSystem(); sessions.addAll(getRemoteSessionsFromZooKeeper()); + if (sessions.isEmpty()) return; + log.log(Level.FINE, () -> "Sessions for tenant " + tenantName + ": " + sessions); // Skip sessions newly added (we might have a session in the file system, but not in ZooKeeper, // we will exclude these) Set newSessions = findNewSessionsInFileSystem(); sessions.removeAll(newSessions); + Collections.sort(sessions); // Avoid deleting too many in one run - int deleteMax = (int) Math.min(1000, Math.max(50, sessions.size() * 0.05)); + int deleteMax = (int) Math.min(1000, Math.max(maxSessionsToDelete, sessions.size() * 0.05)); int deletedRemoteSessions = 0; int deletedLocalSessions = 0; for (Long sessionId : sessions) { try { Session session = remoteSessionCache.get(sessionId); - if (session == null) - session = new RemoteSession(tenantName, sessionId, createSessionZooKeeperClient(sessionId)); + Instant createTime; + Optional localSessionCreateTime = Optional.empty(); + boolean deleteRemoteSession = true; + if (session == null) { + // If remote session is missing (deleted from zookeeper) it will only be present in file system, + // so use local session and its creation time from file system + var localSession = getOptionalSessionFromFileSystem(sessionId); + if (localSession.isEmpty()) continue; + + session = localSession.get(); + createTime = localSessionCreated((LocalSession) session); + localSessionCreateTime= Optional.of(createTime); + deleteRemoteSession = false; + } else { + createTime = session.getCreateTime(); + } Optional applicationId = session.getOptionalApplicationId(); try (var ignored = lockApplication(applicationId)) { @@ -633,19 +647,21 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi boolean activeForApplication = sessionIsActiveForApplication.test(session); if (status == ACTIVATE && activeForApplication) continue; - Instant createTime = session.getCreateTime(); boolean hasExpired = hasExpired(createTime); + log.log(Level.FINE, "Session " + sessionId + ", status " + status + ", has expired: " + hasExpired); if (! hasExpired) continue; - log.log(Level.FINE, () -> "Remote session " + sessionId + " for " + tenantName + " has expired, deleting it"); - deleteRemoteSessionFromZooKeeper(session); - deletedRemoteSessions++; + log.log(Level.FINE, "session " + sessionId + ", status " + status + + ", remote session created " + createTime + + ", local session created " + localSessionCreateTime); + if (deleteRemoteSession) { + log.log(Level.FINE, () -> "Remote session " + sessionId + " for " + tenantName + " has expired, deleting it"); + deleteRemoteSessionFromZooKeeper(session); + deletedRemoteSessions++; + } - var localSessionCanBeDeleted = canBeDeleted(sessionId, status, createTime, activeForApplication); - log.log(Level.FINE, () -> "Expired local session " + sessionId + - ", status " + status + (status == UNKNOWN ? "" : ", created " + createTime) + - ", can be deleted: " + localSessionCanBeDeleted); - if (localSessionCanBeDeleted) { + if (localSessionCanBeDeleted(status, createTime, activeForApplication)) { + log.log(Level.FINE, () -> "Local session " + sessionId + " for " + tenantName + " has expired, deleting it"); deleteLocalSession(sessionId); deletedLocalSessions++; } @@ -657,7 +673,7 @@ public void deleteExpiredRemoteAndLocalSessions(Predicate sessionIsActi } } log.log(Level.FINE, "Deleted " + deletedRemoteSessions + " remote and " + deletedLocalSessions + - " local sessions that had expired"); + " local sessions for tenant " + tenantName + " that had expired"); } private record ApplicationLock(Optional lock) implements Closeable { @@ -695,21 +711,25 @@ private boolean hasExpired(Instant created) { private long sessionLifeTimeInSeconds() { return configserverConfig.sessionLifetime(); } - private boolean canBeDeleted(long sessionId, Session.Status status, Instant createTime, boolean activeForApplication) { - // Delete Sessions with state other than UNKNOWN or ACTIVATE or old sessions in UNKNOWN state - if ( ! List.of(UNKNOWN, ACTIVATE).contains(status) || oldSessionDirWithUnknownStatus(sessionId, status)) + private boolean localSessionCanBeDeleted(Session.Status status, Instant createTime, boolean activeForApplication) { + // Delete sessions with state other than UNKNOWN or ACTIVATE or old sessions in UNKNOWN state + if ( ! List.of(UNKNOWN, ACTIVATE).contains(status) || oldSessionDirWithUnknownStatus(createTime, status)) return true; // This might happen if remote session is gone, but local session is not return isOldAndCanBeDeleted(createTime) && !activeForApplication; } - private boolean oldSessionDirWithUnknownStatus(long sessionId, Session.Status status) { + private boolean oldSessionDirWithUnknownStatus(Instant created, Session.Status status) { Duration expiryTime = Duration.ofHours(configserverConfig.keepSessionsWithUnknownStatusHours()); - File sessionDir = tenantFileSystemDirs.getUserApplicationDir(sessionId); - return sessionDir.exists() + return created != Instant.EPOCH // We don't know anything about creation time for this session && status == UNKNOWN - && created(sessionDir).plus(expiryTime).isBefore(clock.instant()); + && created.plus(expiryTime).isBefore(clock.instant()); + } + + private Instant localSessionCreated(LocalSession session) { + File sessionDir = tenantFileSystemDirs.getUserApplicationDir(session.getSessionId()); + return sessionDir.exists() ? created(sessionDir) : Instant.EPOCH; } private Set findNewSessionsInFileSystem() { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java index 438db91721f9..310f084b0080 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionSerializer.java @@ -5,6 +5,7 @@ import com.yahoo.config.FileReference; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; import com.yahoo.config.provision.CloudAccount; @@ -31,16 +32,20 @@ public class SessionSerializer { void write(SessionZooKeeperClient zooKeeperClient, ApplicationId applicationId, Instant created, Optional fileReference, Optional dockerImageRepository, - Version vespaVersion, Optional athenzDomain, Optional quota, - List tenantSecretStores, List operatorCertificates, - Optional cloudAccount, List dataplaneTokens, ActivationTriggers activationTriggers, + Version vespaVersion, Optional versionToBuildFirst, + Optional athenzDomain, Optional quota, + List tenantVaults, List tenantSecretStores, + List operatorCertificates, Optional cloudAccount, + List dataplaneTokens, ActivationTriggers activationTriggers, BooleanFlag writeSessionData) { zooKeeperClient.writeApplicationId(applicationId); zooKeeperClient.writeApplicationPackageReference(fileReference); zooKeeperClient.writeVespaVersion(vespaVersion); + zooKeeperClient.writeVersionToBuildFirst(versionToBuildFirst); zooKeeperClient.writeDockerImageRepository(dockerImageRepository); zooKeeperClient.writeAthenzDomain(athenzDomain); zooKeeperClient.writeQuota(quota); + zooKeeperClient.writeTenantVaults(tenantVaults); zooKeeperClient.writeTenantSecretStores(tenantSecretStores); zooKeeperClient.writeOperatorCertificates(operatorCertificates); zooKeeperClient.writeCloudAccount(cloudAccount); @@ -50,10 +55,12 @@ void write(SessionZooKeeperClient zooKeeperClient, ApplicationId applicationId, zooKeeperClient.writeSessionData(new SessionData(applicationId, fileReference, vespaVersion, + versionToBuildFirst, created, dockerImageRepository, athenzDomain, quota, + tenantVaults, tenantSecretStores, operatorCertificates, cloudAccount, @@ -77,10 +84,12 @@ private static SessionData readSessionDataFromLegacyPaths(SessionZooKeeperClient return new SessionData(zooKeeperClient.readApplicationId(), zooKeeperClient.readApplicationPackageReference(), zooKeeperClient.readVespaVersion(), + zooKeeperClient.readVersionToBuildFirst(), zooKeeperClient.readCreateTime(), zooKeeperClient.readDockerImageRepository(), zooKeeperClient.readAthenzDomain(), zooKeeperClient.readQuota(), + zooKeeperClient.readTenantVaults(), zooKeeperClient.readTenantSecretStores(), zooKeeperClient.readOperatorCertificates(), zooKeeperClient.readCloudAccount(), diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java index 1f000bc5856f..5c834203cba7 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClient.java @@ -9,6 +9,7 @@ import com.yahoo.config.model.api.ConfigDefinitionRepo; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.AllocatedHosts; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.AthenzDomain; @@ -30,12 +31,14 @@ import com.yahoo.vespa.config.server.tenant.OperatorCertificateSerializer; import com.yahoo.vespa.config.server.tenant.TenantRepository; import com.yahoo.vespa.config.server.tenant.TenantSecretStoreSerializer; +import com.yahoo.vespa.config.server.tenant.TenantVaultSerializer; import com.yahoo.vespa.config.server.zookeeper.ZKApplication; import com.yahoo.vespa.config.server.zookeeper.ZKApplicationPackage; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.transaction.CuratorOperations; import com.yahoo.vespa.curator.transaction.CuratorTransaction; import org.apache.zookeeper.data.Stat; + import java.security.cert.X509Certificate; import java.time.Duration; import java.time.Instant; @@ -51,10 +54,12 @@ import static com.yahoo.vespa.config.server.session.SessionData.CREATE_TIME_PATH; import static com.yahoo.vespa.config.server.session.SessionData.DATAPLANE_TOKENS_PATH; import static com.yahoo.vespa.config.server.session.SessionData.DOCKER_IMAGE_REPOSITORY_PATH; +import static com.yahoo.vespa.config.server.session.SessionData.VERSION_TO_BUILD_FIRST_PATH; import static com.yahoo.vespa.config.server.session.SessionData.OPERATOR_CERTIFICATES_PATH; import static com.yahoo.vespa.config.server.session.SessionData.QUOTA_PATH; import static com.yahoo.vespa.config.server.session.SessionData.SESSION_DATA_PATH; import static com.yahoo.vespa.config.server.session.SessionData.TENANT_SECRET_STORES_PATH; +import static com.yahoo.vespa.config.server.session.SessionData.TENANT_VAULTS_PATH; import static com.yahoo.vespa.config.server.session.SessionData.VERSION_PATH; import static com.yahoo.vespa.config.server.zookeeper.ZKApplication.USER_DEFCONFIGS_ZK_SUBPATH; import static com.yahoo.vespa.curator.Curator.CompletionWaiter; @@ -187,9 +192,9 @@ private Path applicationPackageReferencePath() { return sessionPath.append(APPLICATION_PACKAGE_REFERENCE_PATH); } - private Path versionPath() { - return sessionPath.append(VERSION_PATH); - } + private Path versionPath() { return sessionPath.append(VERSION_PATH); } + + private Path versionToBuildFirstPath() { return sessionPath.append(VERSION_TO_BUILD_FIRST_PATH); } private Path dockerImageRepositoryPath() { return sessionPath.append(DOCKER_IMAGE_REPOSITORY_PATH); @@ -203,6 +208,10 @@ private Path quotaPath() { return sessionPath.append(QUOTA_PATH); } + private Path tenantVaultPath() { + return sessionPath.append(TENANT_VAULTS_PATH); + } + private Path tenantSecretStorePath() { return sessionPath.append(TENANT_SECRET_STORES_PATH); } @@ -223,6 +232,10 @@ public void writeVespaVersion(Version version) { curator.set(versionPath(), Utf8.toBytes(version.toString())); } + public void writeVersionToBuildFirst (Optional version) { + version.ifPresent(v -> curator.set(versionToBuildFirstPath(), Utf8.toBytes(v.toString()))); + } + public void writeSessionData(SessionData sessionData) { curator.set(sessionPath.append(SESSION_DATA_PATH), sessionData.toJson()); } @@ -243,6 +256,11 @@ public Version readVespaVersion() { }); } + public Optional readVersionToBuildFirst() { + Optional data = curator.getData(versionToBuildFirstPath()); + return data.map(d -> Version.fromString(Utf8.toString(d))); + } + public Optional readDockerImageRepository() { Optional dockerImageRepository = curator.getData(dockerImageRepositoryPath()); return dockerImageRepository.map(d -> DockerImage.fromString(Utf8.toString(d))); @@ -315,6 +333,20 @@ public Optional readQuota() { .map(slime -> Quota.fromSlime(slime.get())); } + public void writeTenantVaults(List tenantVaults) { + if (! tenantVaults.isEmpty()) { + var bytes = uncheck(() -> SlimeUtils.toJsonBytes(TenantVaultSerializer.toSlime(tenantVaults))); + curator.set(tenantVaultPath(), bytes); + } + } + + public List readTenantVaults() { + return curator.getData(tenantVaultPath()) + .map(SlimeUtils::jsonToSlime) + .map(slime -> TenantVaultSerializer.listFromSlime(slime.get())) + .orElse(List.of()); + } + public void writeTenantSecretStores(List tenantSecretStores) { if (!tenantSecretStores.isEmpty()) { var bytes = uncheck(() -> SlimeUtils.toJsonBytes(TenantSecretStoreSerializer.toSlime(tenantSecretStores))); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java new file mode 100644 index 000000000000..c60b0bf05907 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/TenantVaultSerializer.java @@ -0,0 +1,84 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.config.server.tenant; + +import com.yahoo.config.model.api.TenantVault; +import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Slime; +import com.yahoo.slime.Type; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author gjoranv + */ +public class TenantVaultSerializer { + + // WARNING: Since there are multiple servers in a ZooKeeper cluster, and they upgrade one by one + // (and rewrite all nodes on startup), changes to the serialized format must be made + // such that what is serialized on version N+1 can be read by version N: + // - ADDING FIELDS: Always ok + // - REMOVING FIELDS: Stop reading the field first. Stop writing it on a later version. + // - CHANGING THE FORMAT OF A FIELD: Don't do it, bro. + + private static final String idField = "id"; + private static final String nameField = "name"; + private static final String externalIdField = "externalId"; + private static final String secretsArray = "secrets"; + + public static Slime toSlime(List vaults) { + Slime slime = new Slime(); + Cursor cursor = slime.setArray(); + toSlime(vaults, cursor); + return slime; + } + + public static void toSlime(List vaults, Cursor cursor) { + vaults.forEach(tenantVault -> toSlime(tenantVault, cursor.addObject())); + } + + private static void toSlime(TenantVault vault, Cursor object) { + object.setString(idField, vault.id()); + object.setString(nameField, vault.name()); + object.setString(externalIdField, vault.externalId()); + Cursor secrets = object.setArray(secretsArray); + vault.secrets().forEach(secret -> toSlime(secret, secrets.addObject())); + } + + private static void toSlime(TenantVault.Secret secret, Cursor object) { + object.setString("name", secret.name()); + object.setString("id", secret.id()); + } + + public static TenantVault fromSlime(Inspector inspector) { + if (inspector.type() == Type.OBJECT) { + return new TenantVault( + inspector.field(idField).asString(), + inspector.field(nameField).asString(), + inspector.field(externalIdField).asString(), + secretsFromSlime(inspector.field(secretsArray))); + } + throw new IllegalArgumentException("Unknown format encountered for tenant vaults!"); + } + + private static List secretsFromSlime(Inspector inspector) { + List secrets = new ArrayList<>(); + inspector.traverse(((ArrayTraverser)(idx, secret) -> secrets.add(secretFromSlime(secret)))); + return secrets; + } + + private static TenantVault.Secret secretFromSlime(Inspector inspector) { + return new TenantVault.Secret( + inspector.field("id").asString(), inspector.field("name").asString() + ); + } + + public static List listFromSlime(Inspector inspector) { + List tenantVaults = new ArrayList<>(); + inspector.traverse(((ArrayTraverser)(idx, vault) -> tenantVaults.add(fromSlime(vault)))); + return tenantVaults; + } + +} diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java index 3289cc71357f..5541f8256fe4 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java @@ -74,6 +74,7 @@ public void testModelContextTest() { Optional.empty(), Optional.empty(), List.of(), + List.of(), new SecretStoreProvider().get(), List.of(), Optional.empty(), diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java index b10d449ddbe7..2553238e7513 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java @@ -23,6 +23,7 @@ import com.yahoo.config.provision.Zone; import com.yahoo.slime.SlimeUtils; import com.yahoo.test.ManualClock; +import com.yahoo.vespa.config.server.ApplicationRepository; import com.yahoo.vespa.config.server.MockConfigConvergenceChecker; import com.yahoo.vespa.config.server.application.ApplicationReindexing; import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker; @@ -32,7 +33,9 @@ import com.yahoo.vespa.config.server.http.v2.PrepareResult; import com.yahoo.vespa.config.server.maintenance.PendingRestartsMaintainer; import com.yahoo.vespa.config.server.model.TestModelFactory; +import com.yahoo.vespa.config.server.session.LocalSession; import com.yahoo.vespa.config.server.session.PrepareParams; +import com.yahoo.vespa.config.server.session.RemoteSession; import com.yahoo.vespa.model.application.validation.change.VespaReindexAction; import com.yahoo.vespa.model.application.validation.change.VespaRestartAction; import org.junit.Rule; @@ -161,6 +164,32 @@ public void testDeployMultipleVersions() { assertEquals(9, tester.getAllocatedHostsOf(tester.applicationId()).getHosts().size()); } + @Test + public void testDeployMultipleVersionsSpecifyingWhicVersionToBuildFirst() { + List modelFactories = List.of(createHostedModelFactory(Version.fromString("8.1.0")), + createHostedModelFactory(Version.fromString("8.2.0")), + createHostedModelFactory(Version.fromString("8.3.0"))); + var tester = new DeployTester.Builder(temporaryFolder) + .hostedConfigserverConfig(Zone.defaultZone()) + .modelFactories(modelFactories) + .build(); + var appRepo = tester.applicationRepository(); + var applicationId = tester.applicationId(); + + // Deploy as usual, only wanted version is set => 8.2.0 and 8.3.0 (latest version) are built + tester.deployApp("src/test/apps/hosted/", new PrepareParams.Builder() + .vespaVersion("8.2.0")); + assertEquals("8.2.0", appRepo.getActiveSession(applicationId).get().getVespaVersion().toFullString()); + assertEquals(List.of("8.2.0", "8.3.0"), appRepo.getActiveApplicationVersions(applicationId).get().versions().stream().map(Version::toFullString).toList()); + + // Deploy with vespaVersionToBuildFirst=8.2.0 and wanted version set to 8.2.0 => only 8.2.0 version is built + tester.deployApp("src/test/apps/hosted/", new PrepareParams.Builder() + .vespaVersion("8.2.0") + .vespaVersionToBuildFirst("8.2.0")); + assertEquals("8.2.0", appRepo.getActiveSession(applicationId).get().getVespaVersion().toFullString()); + assertEquals(List.of("8.2.0"), appRepo.getActiveApplicationVersions(applicationId).get().versions().stream().map(Version::toFullString).toList()); + } + /** * Test that only the minimal set of models are created (model versions used on hosts, the wanted version * and the latest version for the latest major) diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainerTest.java index 3178efa0677b..0404df0f3b19 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainerTest.java @@ -23,6 +23,7 @@ import java.nio.file.Files; import java.time.Duration; import java.util.ArrayList; +import java.util.List; import static com.yahoo.vespa.config.server.session.Session.Status.PREPARE; import static com.yahoo.vespa.config.server.session.Session.Status.UNKNOWN; @@ -179,19 +180,45 @@ public void testDeletingInactiveSessions3() throws IOException { assertFalse(applicationPath.toFile().exists()); // App has been deleted } + @Test + public void testDeletionOfOldestFirst() { + // Delete max 1 session + tester = createTester(new InMemoryFlagSource(), 1); + + // Deploy some sessions when time goes backwards, to be able to have another + // order of sessions than increasing with time. 3 is the oldest session, 4 is active, + // so 2 should be deleted when maintainer runs + tester.deployApp(testApp, prepareParams()); // session 2 (numbering starts at 2) + clock.retreat(Duration.ofMinutes(10)); + tester.deployApp(testApp, prepareParams()); // session 3 + clock.retreat(Duration.ofMinutes(10)); + tester.deployApp(testApp, prepareParams()); // session 4 + + clock.advance(Duration.ofMinutes(60)); + maintainer.run(); + + var sessions = sessionRepository.getRemoteSessionsFromZooKeeper(); + assertEquals(2, sessions.size()); + assertEquals(List.of(3L, 4L), sessions); + } + private MaintainerTester createTester() { return createTester(flagSource); } private MaintainerTester createTester(FlagSource flagSource) { + return createTester(flagSource, 50); + } + + private MaintainerTester createTester(FlagSource flagSource, int maxSessionsToDelete) { var tester = uncheck(() -> new MaintainerTester(clock, temporaryFolder, flagSource)); - return setup(tester); + return setup(tester, maxSessionsToDelete); } - private MaintainerTester setup(MaintainerTester tester) { + private MaintainerTester setup(MaintainerTester tester, int maxSessionsToDelete) { applicationRepository = tester.applicationRepository(); applicationRepository.tenantRepository().addTenant(applicationId.tenant()); - maintainer = new SessionsMaintainer(applicationRepository, tester.curator(), Duration.ofMinutes(1)); + maintainer = new SessionsMaintainer(applicationRepository, tester.curator(), Duration.ofMinutes(1), maxSessionsToDelete); sessionRepository = applicationRepository.getTenant(applicationId).getSessionRepository(); var serverdb = new File(applicationRepository.configserverConfig().configServerDBDir()); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java index 3bbe13837fc0..0efdda7ce8d5 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java @@ -1,10 +1,12 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.config.server.session; +import com.yahoo.component.Version; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.EndpointCertificateMetadata; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.TenantName; @@ -18,6 +20,7 @@ import com.yahoo.vespa.config.server.tenant.ContainerEndpointSerializer; import com.yahoo.vespa.config.server.tenant.EndpointCertificateMetadataSerializer; import com.yahoo.vespa.config.server.tenant.TenantSecretStoreSerializer; +import com.yahoo.vespa.config.server.tenant.TenantVaultSerializer; import org.junit.Test; import java.io.IOException; @@ -64,6 +67,7 @@ public void testCorrectParsing() { assertFalse(prepareParams.isVerbose()); assertFalse(prepareParams.ignoreValidationErrors()); assertTrue(prepareParams.vespaVersion().isEmpty()); + assertTrue(prepareParams.vespaVersionToBuildFirst().isEmpty()); assertTrue(prepareParams.getTimeoutBudget().hasTimeLeft()); assertTrue(prepareParams.containerEndpoints().isEmpty()); assertTrue(prepareParams.cloudAccount().isEmpty()); @@ -170,6 +174,35 @@ public void testOperatorCertificates() throws IOException { assertEquals(certificate, prepareParams.operatorCertificates().get(0)); } + @Test + public void testTenantVaults() throws IOException { + List tenantVaults = List.of(new TenantVault( + "id", "name", "extId", + List.of(new TenantVault.Secret("sId", "sName")))); + + Slime tenantVaultSlime = TenantVaultSerializer.toSlime(tenantVaults); + String tenantVaultParam = new String(SlimeUtils.toJsonBytes(tenantVaultSlime), StandardCharsets.UTF_8); + + var prepareParams = createParams(request + "&" + PrepareParams.TENANT_VAULTS_PARAM_NAME + "=" + + URLEncoder.encode(tenantVaultParam, StandardCharsets.UTF_8), + TenantName.from("foo")); + + assertEquals(1, prepareParams.tenantVaults().size()); + TenantVault tenantVault = prepareParams.tenantVaults().get(0); + assertEquals("id", tenantVault.id()); + assertEquals("name", tenantVault.name()); + assertEquals("extId", tenantVault.externalId()); + assertEquals(1, tenantVault.secrets().size()); + assertEquals("sId", tenantVault.secrets().get(0).id()); + assertEquals("sName", tenantVault.secrets().get(0).name()); + + // Verify using json object + var root = SlimeUtils.jsonToSlime(json); + new Injector().inject(tenantVaultSlime.get(), new ObjectInserter(root.get(), PrepareParams.TENANT_VAULTS_PARAM_NAME)); + PrepareParams prepareParamsJson = PrepareParams.fromJson(SlimeUtils.toJsonBytes(root), TenantName.from("foo"), Duration.ofSeconds(60)); + assertPrepareParamsEqual(prepareParams, prepareParamsJson); + } + @Test public void testSecretStores() throws IOException { List secretStores = List.of(new TenantSecretStore("name", "awsId", "role", "extId")); @@ -198,6 +231,13 @@ public void testCloudAccount() { assertEquals(CloudAccount.from("012345678912"), params.cloudAccount().get()); } + @Test + public void testFirstVespaVersionToBuild() { + String json = "{\"vespaVersionToBuildFirst\": \"8.3.0\"}"; + PrepareParams params = PrepareParams.fromJson(json.getBytes(StandardCharsets.UTF_8), TenantName.defaultName(), Duration.ZERO); + assertEquals(Version.fromString("8.3.0"), params.vespaVersionToBuildFirst().get()); + } + private void assertPrepareParamsEqual(PrepareParams urlParams, PrepareParams jsonParams) { assertEquals(urlParams.ignoreValidationErrors(), jsonParams.ignoreValidationErrors()); assertEquals(urlParams.isDryRun(), jsonParams.isDryRun()); @@ -208,11 +248,13 @@ private void assertPrepareParamsEqual(PrepareParams urlParams, PrepareParams jso assertEquals(urlParams.getApplicationId(), jsonParams.getApplicationId()); assertEquals(urlParams.getTimeoutBudget().timeout(), jsonParams.getTimeoutBudget().timeout()); assertEquals(urlParams.vespaVersion(), jsonParams.vespaVersion()); + assertEquals(urlParams.vespaVersionToBuildFirst(), jsonParams.vespaVersionToBuildFirst()); assertEquals(urlParams.containerEndpoints(), jsonParams.containerEndpoints()); assertEquals(urlParams.endpointCertificateMetadata(), jsonParams.endpointCertificateMetadata()); assertEquals(urlParams.dockerImageRepository(), jsonParams.dockerImageRepository()); assertEquals(urlParams.athenzDomain(), jsonParams.athenzDomain()); assertEquals(urlParams.quota(), jsonParams.quota()); + assertEquals(urlParams.tenantVaults(), jsonParams.tenantVaults()); assertEquals(urlParams.tenantSecretStores(), jsonParams.tenantSecretStores()); } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java index d6631cc42b6f..26dbb5724b33 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionZooKeeperClientTest.java @@ -6,6 +6,7 @@ import com.yahoo.config.FileReference; import com.yahoo.config.model.api.Quota; import com.yahoo.config.model.api.TenantSecretStore; +import com.yahoo.config.model.api.TenantVault; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.TenantName; import com.yahoo.path.Path; @@ -146,6 +147,20 @@ public void require_quota_written_and_parsed() { assertEquals(quota, zkc.readQuota()); } + @Test + public void tenant_vaults_are_written_and_parsed() { + var vaults = List.of( + new TenantVault("id1", "name1", "extId1", + List.of(new TenantVault.Secret("sId1", "sName1"))), + new TenantVault("id2", "name2", "extId2", + List.of(new TenantVault.Secret("sId2", "sName2")))); + + var zkc = createSessionZKClient(4); + zkc.writeTenantVaults(vaults); + List actual = zkc.readTenantVaults(); + assertEquals(vaults, actual); + } + @Test public void require_tenant_secret_stores_written_and_parsed() { var secretStores = List.of( @@ -165,12 +180,14 @@ public void require_that_session_data_is_written_to_zk() { zkc.writeSessionData(new SessionData(ApplicationId.defaultId(), Optional.of(new FileReference("foo")), Version.fromString("8.195.1"), + Optional.empty(), Instant.now(), Optional.empty(), Optional.empty(), Optional.empty(), List.of(), List.of(), + List.of(), Optional.empty(), List.of(), ActivationTriggers.empty())); @@ -178,7 +195,7 @@ public void require_that_session_data_is_written_to_zk() { assertTrue(curator.exists(path)); String data = Utf8.toString(curator.getData(path).get()); assertTrue(data.contains("{\"applicationId\":\"default:default:default\",\"applicationPackageReference\":\"foo\",\"version\":\"8.195.1\",\"createTime\":")); - assertTrue(data.contains(",\"tenantSecretStores\":[],\"operatorCertificates\":[],\"dataplaneTokens\":[]," + + assertTrue(data.contains(",\"tenantVaults\":[],\"tenantSecretStores\":[],\"operatorCertificates\":[],\"dataplaneTokens\":[]," + "\"activationTriggers\":{\"nodeRestarts\":[],\"reindexings\":[]}")); } diff --git a/container-disc/src/main/java/ai/vespa/secret/model/SecretValue.java b/container-disc/src/main/java/ai/vespa/secret/model/SecretValue.java index aec491382fc3..bc940f0fb689 100644 --- a/container-disc/src/main/java/ai/vespa/secret/model/SecretValue.java +++ b/container-disc/src/main/java/ai/vespa/secret/model/SecretValue.java @@ -5,10 +5,15 @@ */ public record SecretValue(String value) { + private static final int MAX_LENGTH = 64*1024; + public SecretValue { if (value == null || value.isBlank()) { throw new IllegalArgumentException("Secret value cannot be null or empty"); } + if (value.length() > MAX_LENGTH) { + throw new IllegalArgumentException("Secret value is too long"); + } } public static SecretValue of(String value) { diff --git a/container-disc/src/main/resources/configdefinitions/asm-secret.def b/container-disc/src/main/resources/configdefinitions/asm-secret.def index 268060131aea..f17ed686827a 100644 --- a/container-disc/src/main/resources/configdefinitions/asm-secret.def +++ b/container-disc/src/main/resources/configdefinitions/asm-secret.def @@ -4,8 +4,8 @@ package=ai.vespa.secret.config.aws ztsUri string athenzDomain string default="" +refreshInterval int default=30 # refresh interval in minutes -# TODO: move to a separaet config (and remove defaults). Only used by AsmTenantSecretReader -# Used to create the athenz role name when retrieving secrets on behalf of a tenant +# TODO: unused, remove after model version xxx.yyy has rolled out of hosted Vespa tenant string default="" system string default="" diff --git a/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def b/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def new file mode 100644 index 000000000000..3049c6429e23 --- /dev/null +++ b/container-disc/src/main/resources/configdefinitions/asm-tenant-secret.def @@ -0,0 +1,12 @@ +package=ai.vespa.secret.config.aws + +# Used to create the athenz role name when retrieving secrets on behalf of a tenant +system string +tenant string + +# Mapping used to create the AWS role name to assume for retrieving secrets +vaults[].id string +vaults[].name string +vaults[].externalId string +vaults[].secrets[].id string +vaults[].secrets[].name string diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 5c70d3902c8e..6505bade5006 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -1793,6 +1793,7 @@ "public static final com.yahoo.prelude.query.TermType NOT", "public static final com.yahoo.prelude.query.TermType PHRASE", "public static final com.yahoo.prelude.query.TermType EQUIV", + "public static final com.yahoo.prelude.query.TermType WEAK_AND", "public static final com.yahoo.prelude.query.TermType DEFAULT", "public final java.lang.String name" ] diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TermType.java b/container-search/src/main/java/com/yahoo/prelude/query/TermType.java index 0ce51ee6fc91..4e93617d3b65 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/TermType.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/TermType.java @@ -22,6 +22,8 @@ public class TermType { public static final TermType EQUIV = new TermType("equiv", Item.ItemType.EQUIV, EquivItem.class, null, ""); + public static final TermType WEAK_AND = new TermType("wand", Item.ItemType.WAND, WeakAndItem.class, null, "~"); + public static final TermType DEFAULT = new TermType("", Item.ItemType.AND, CompositeItem.class, AndItem.class, ""); public final String name; diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java index 0559bd808bc1..b8b5496cf6fb 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -19,7 +19,7 @@ import java.util.Map; /** - * A searcher which does parameterized collapsing. + * A searcher which removes hits which has an already seen value of a given field. * * @author Steinar Knutsen */ @@ -28,8 +28,9 @@ public class FieldCollapsingSearcher extends Searcher { private static final CompoundName collapse = CompoundName.from("collapse"); - private static final CompoundName collapsefield = CompoundName.from("collapsefield"); - private static final CompoundName collapsesize = CompoundName.from("collapsesize"); + // TODO: Use collapse.field and collapse.size and make these aliases + private static final CompoundName collapseField = CompoundName.from("collapsefield"); + private static final CompoundName collapseSize = CompoundName.from("collapsesize"); private static final CompoundName collapseSummaryName = CompoundName.from("collapse.summary"); /** Separator used for the fieldnames in collapsefield */ @@ -40,15 +41,13 @@ public class FieldCollapsingSearcher extends Searcher { /** * The max number of hits that will be preserved per unique - * value of the collapsing parameter, - * if no field-specific value is configured. + * value of the collapsing parameter, if no field-specific value is configured. */ private int defaultCollapseSize; /** * The factor by which to scale up the requested number of hits - * from the next searcher in the chain, because collapsing will - * likely delete many hits. + * from the next searcher in the chain, because collapsing will likely delete many hits. */ private double extraFactor; @@ -60,10 +59,8 @@ public FieldCollapsingSearcher() { @Inject @SuppressWarnings("unused") public FieldCollapsingSearcher(QrSearchersConfig config) { - QrSearchersConfig.Com.Yahoo.Prelude.Searcher.FieldCollapsingSearcher - s = config.com().yahoo().prelude().searcher().FieldCollapsingSearcher(); - - init(s.collapsesize(), s.extrafactor()); + var searcherConfig = config.com().yahoo().prelude().searcher().FieldCollapsingSearcher(); + init(searcherConfig.collapsesize(), searcherConfig.extrafactor()); } /** @@ -82,6 +79,9 @@ public FieldCollapsingSearcher(int collapseSize, double extraFactor) { private void init(int collapseSize, double extraFactor) { this.defaultCollapseSize = collapseSize; this.extraFactor = extraFactor; + if (extraFactor < 1.0) { + throw new IllegalArgumentException("FieldCollapsingSearcher: extraFactor " + extraFactor + " should be >= 1.0"); + } } /** @@ -91,19 +91,17 @@ private void init(int collapseSize, double extraFactor) { */ @Override public Result search(com.yahoo.search.Query query, Execution execution) { - String collapseFieldParam = query.properties().getString(collapsefield); - + String collapseFieldParam = query.properties().getString(collapseField); if (collapseFieldParam == null) return execution.search(query); String[] collapseFields = collapseFieldParam.split(separator); - - int globalCollapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize); - + int globalCollapseSize = query.properties().getInteger(collapseSize, defaultCollapseSize); query.properties().set(collapse, "0"); - int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0; + int wantedHits = query.getOffset() + query.getHits(); + int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((wantedHits + 1) * extraFactor) : 0; int nextOffset = 0; - int hitsAfterCollapse; + int hitsAfterCollapse = 0; boolean moreHitsAvailable = true; Map knownCollapses = new java.util.HashMap<>(); Result result = new Result(query); @@ -113,14 +111,15 @@ public Result search(com.yahoo.search.Query query, Execution execution) { String summaryClass = (collapseSummary == null) ? query.getPresentation().getSummary() : collapseSummary; query.trace("Collapsing by '" + Arrays.toString(collapseFields) + "' using summary '" + collapseSummary + "'", 2); - + boolean wantAnotherQuery; do { + if (performedQueries > 0) { + query.trace("Collapsing: retry " + performedQueries + ", only has " + hitsAfterCollapse + " hits, wanted " + wantedHits, 2); + } resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); fill(resultSource, summaryClass, execution); - collapse(result, knownCollapses, resultSource, - collapseFields, query.properties(), globalCollapseSize - ); + collapse(result, knownCollapses, resultSource, collapseFields, query.properties(), globalCollapseSize); hitsAfterCollapse = result.getHitCount(); if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) { @@ -128,19 +127,21 @@ public Result search(com.yahoo.search.Query query, Execution execution) { moreHitsAvailable = false; } nextOffset += hitsToRequest; - if (hitsAfterCollapse < query.getOffset() + query.getHits()) { + if (hitsAfterCollapse < wantedHits) { hitsToRequest = (int) Math.ceil(hitsToRequest * extraFactor); } ++performedQueries; - - } while (hitsToRequest != 0 - && (hitsAfterCollapse < query.getOffset() + query.getHits()) - && moreHitsAvailable - && (performedQueries <= maxQueries)); - + wantAnotherQuery = (hitsToRequest != 0 + && (hitsAfterCollapse < wantedHits) + && moreHitsAvailable); + } while (wantAnotherQuery && (performedQueries <= maxQueries)); + // failure? + if (wantAnotherQuery) { + query.trace("Collapsing: giving up after " + performedQueries + " performed queries, collapsing removed too many hits", 1); + } // Set correct meta information result.mergeWith(resultSource); - // Keep only (offset,.. offset+hits) hits + // Keep only (offset ... offset+hits) hits result.hits().trim(query.getOffset(), query.getHits()); // Mark query as query with collapsing query.properties().set(collapse, "1"); @@ -160,7 +161,6 @@ private Result search(Query query, Execution execution, int offset, int hits) { */ private void collapse(Result result, Map knownCollapses, Result resultSource, String[] collapseFields, Properties queryProperties, int globalCollapseSize) { - for (Hit unknownHit : resultSource.hits()) { if (!(unknownHit instanceof FastHit hit)) { result.hits().add(unknownHit); @@ -168,14 +168,10 @@ private void collapse(Result result, Map knownCollapses, Result } boolean addHit = true; - for (String collapseField : collapseFields) { - Object peek = hit.getField(collapseField); String collapseId = peek != null ? peek.toString() : null; - if (collapseId == null) { - continue; - } + if (collapseId == null) continue; // prepending the fieldname is necessary to distinguish between values in the different collapsefields // @ cannot occur in fieldnames @@ -199,19 +195,14 @@ private void collapse(Result result, Map knownCollapses, Result } } - if (addHit) { + if (addHit) result.hits().add(hit); - } } } private int getCollapseSize(Properties properties, String fieldName, int globalCollapseSize) { - Integer fieldCollapseSize = properties.getInteger(collapsesize.append(fieldName)); - - if (fieldCollapseSize != null) { - return fieldCollapseSize; - } - - return globalCollapseSize; + Integer fieldCollapseSize = properties.getInteger(collapseSize.append(fieldName)); + return fieldCollapseSize != null ? fieldCollapseSize : globalCollapseSize; } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java index 91aef5698261..5119ec1a1e77 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java @@ -343,7 +343,9 @@ private void insertWithDesiredParentType(List items, int index, CompositeI CompositeItem newParent = newParent(desiredParentType); if (parentsParent != null && (! (parentsParent instanceof QueryTree) && parentsParent.getItemType() == newParent.getItemType())) { // Collapse - newParent = parentsParent; + for (Item item : items) + parentsParent.addItem(item); + return; } for (Item item : items) @@ -376,7 +378,7 @@ else if (newParent.acceptsItemsOfType(current.getItemType())) { // insert new pa } private CompositeItem newParent(TermType desiredParentType) { - return desiredParentType == TermType.DEFAULT ? new AndItem() : (CompositeItem)desiredParentType.createItemClass(); + return createType(desiredParentType); } private Item combineItems(Item first, Item second, TermType termType) { @@ -442,6 +444,8 @@ private CompositeItem createType(TermType termType) { if (termType == TermType.DEFAULT) { if (query.getModel().getType() == Query.Type.ANY) return new OrItem(); + else if (query.getModel().getType() == Query.Type.WEAKAND) + return new WeakAndItem(); else return new AndItem(); } diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java index fbcd8935ebe0..d59f9ad7f6ad 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java @@ -65,7 +65,7 @@ public Item toItem(String label) { /** Returns a new item representing this match */ public Item toItem(String label, String term) { - var newItem = new WordItem(term, label); + var newItem = new WordItem(term, label, true); newItem.setWeight(item.getWeight()); return newItem; } diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java index 42ad31382b89..7d4072efdd62 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java @@ -48,7 +48,7 @@ public void produce(RuleEvaluation e, int offset) { PhraseItem newPhrase = new PhraseItem(); newPhrase.setIndexName(getLabel()); for (String term : terms) - newPhrase.addItem(new WordItem(term)); + newPhrase.addItem(new WordItem(term, true)); Match matched = e.getNonreferencedMatch(0); insertMatch(e, matched, List.of(newPhrase), offset); diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java index 2e1c16902228..0f5cdcb0d03f 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java @@ -61,7 +61,7 @@ public void setLiteral(String literal) { public String getLiteral() { return literal; } public void produce(RuleEvaluation e, int offset) { - WordItem newItem = new WordItem(literal, getLabel()); + WordItem newItem = new WordItem(literal, getLabel(), true); if (replacing) { Match matched = e.getNonreferencedMatch(0); newItem.setWeight(matched.getItem().getWeight()); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java index 1689f6d246e5..6c3a1251cc74 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java @@ -51,10 +51,14 @@ public class Dispatcher extends AbstractComponent { public static final String DISPATCH = "dispatch"; private static final String TOP_K_PROBABILITY = "topKProbability"; + private static final String DOCSUM_RETRY_LIMIT = "docsumRetryLimit"; + private static final String DOCSUM_RETRY_FACTOR = "docsumRetryFactor"; private static final int MAX_GROUP_SELECTION_ATTEMPTS = 3; /** If set will control computation of how many hits will be fetched from each partition.*/ public static final CompoundName topKProbability = CompoundName.from(DISPATCH + "." + TOP_K_PROBABILITY); + public static final CompoundName docsumRetryLimit = CompoundName.from(DISPATCH + "." + DOCSUM_RETRY_LIMIT); + public static final CompoundName docsumRetryFactor = CompoundName.from(DISPATCH + "." + DOCSUM_RETRY_FACTOR); private final InvokerFactoryFactory invokerFactories; private final DispatchConfig dispatchConfig; @@ -101,6 +105,8 @@ T register(T invoker) { argumentType.setStrict(true); argumentType.setBuiltin(true); argumentType.addField(new FieldDescription(TOP_K_PROBABILITY, FieldType.doubleType)); + argumentType.addField(new FieldDescription(DOCSUM_RETRY_LIMIT, FieldType.integerType)); + argumentType.addField(new FieldDescription(DOCSUM_RETRY_FACTOR, FieldType.doubleType)); argumentType.freeze(); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java index a246589ec7eb..20512465fb2b 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java @@ -96,6 +96,9 @@ protected Object sendSearchRequest(Query query, Object unusedContext) throws IOE ? estimateHitsToFetch(neededHits, invokers.size(), topkProbabilityOverrride) : estimateHitsToFetch(neededHits, invokers.size()); } + if (q < neededHits) { + query.trace("Only fetching " + q + " of " + neededHits + " hits per node (TopK probability for " + invokers.size() + " nodes)", 1); + } query.setHits(q); query.setOffset(0); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java index d31bd1f08fe8..7679705a1361 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java @@ -16,7 +16,6 @@ public interface RpcConnectionPool extends AutoCloseable { /** Returns a connection to the given node id. */ Client.NodeConnection getConnection(int nodeId); - /** Will return a list of items that need a delayed close when updating node set. */ default Collection updateNodes(DispatchNodesConfig nodesConfig) { return List.of(); } @@ -24,4 +23,5 @@ public interface RpcConnectionPool extends AutoCloseable { @Override void close(); + default Collection knownNodeIds() { return List.of(); } } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java index fe6dc9abe199..8e3b64d1fba9 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcProtobufFillInvoker.java @@ -4,7 +4,6 @@ import ai.vespa.searchlib.searchprotocol.protobuf.SearchProtocol; import com.google.protobuf.InvalidProtocolBufferException; import com.yahoo.collections.ListMap; -import com.yahoo.collections.Pair; import com.yahoo.compress.Compressor; import com.yahoo.container.protect.Error; import com.yahoo.data.access.Inspector; @@ -14,6 +13,7 @@ import com.yahoo.prelude.fastsearch.TimeoutException; import com.yahoo.search.Query; import com.yahoo.search.Result; +import com.yahoo.search.dispatch.Dispatcher; import com.yahoo.search.dispatch.FillInvoker; import com.yahoo.search.dispatch.rpc.Client.ProtobufResponse; import com.yahoo.search.result.ErrorMessage; @@ -22,10 +22,12 @@ import com.yahoo.slime.BinaryFormat; import com.yahoo.slime.BinaryView; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.logging.Logger; @@ -45,18 +47,22 @@ enum DecodePolicy {EAGER, ONDEMAND} private final DocumentDatabase documentDb; private final RpcConnectionPool resourcePool; - private final boolean summaryNeedsQuery; + private boolean summaryNeedsQuery; private final String serverId; private final CompressPayload compressor; private final DecodePolicy decodePolicy; - private BlockingQueue, List>> responses; + private record ResponseAndHits(Client.ResponseOrError response, List hits) {} + + private BlockingQueue responses; /** Whether we have already logged/notified about an error - to avoid spamming */ private boolean hasReportedError = false; /** The number of responses we should receive (and process) before this is complete */ private int outstandingResponses; + private int numOkFilledHits = 0; + private int numHitsToFill = 0; RpcProtobufFillInvoker(RpcConnectionPool resourcePool, CompressPayload compressor, DocumentDatabase documentDb, String serverId, DecodePolicy decodePolicy, boolean summaryNeedsQuery) { @@ -71,11 +77,15 @@ enum DecodePolicy {EAGER, ONDEMAND} @Override protected void sendFillRequest(Result result, String summaryClass) { ListMap hitsByNode = hitsByNode(result); + int queueSize = Math.max(hitsByNode.size(), resourcePool.knownNodeIds().size()); + responses = new LinkedBlockingQueue<>(queueSize); + sendFillRequestByNode(result, summaryClass, hitsByNode); + } + void sendFillRequestByNode(Result result, String summaryClass, ListMap hitsByNode) { result.getQuery().trace(false, 5, "Sending ", hitsByNode.size(), " summary fetch requests with jrt/protobuf"); outstandingResponses = hitsByNode.size(); - responses = new LinkedBlockingQueue<>(outstandingResponses); var timeout = TimeoutHelper.calculateTimeout(result.getQuery()); if (timeout.timedOut()) { @@ -111,16 +121,18 @@ protected void release() { /** Called by a thread belonging to the client when a valid response becomes available */ public void receive(Client.ResponseOrError response, List hitsContext) { - responses.add(new Pair<>(response, hitsContext)); + responses.add(new ResponseAndHits(response, hitsContext)); } /** Return a map of hits by their search node (partition) id */ - private static ListMap hitsByNode(Result result) { + private final ListMap hitsByNode(Result result) { ListMap hitsByNode = new ListMap<>(); - for (Hit hit : (Iterable) result.hits()::unorderedDeepIterator) - if (hit instanceof FastHit fastHit) + for (Hit hit : (Iterable) result.hits()::unorderedDeepIterator) { + if (hit instanceof FastHit fastHit) { + ++numHitsToFill; hitsByNode.put(fastHit.getDistributionKey(), fastHit); - + } + } return hitsByNode; } @@ -135,34 +147,40 @@ private void sendDocsumsRequest(int nodeId, List hits, byte[] payload, log.warning("Got hits with node id " + nodeId + ", which is not included in the current dispatch config"); return; } - Query query = result.getQuery(); Compressor.Compression compressionResult = compressor.compress(query, payload); node.request(RPC_METHOD, compressionResult.type(), payload.length, compressionResult.data(), roe -> receive(roe, hits), clientTimeout); } + private ResponseAndHits getNextResponse(long timeLeftMs) throws InterruptedException { + if (timeLeftMs <= 0) { + return null; + } + var responseAndHits = responses.poll(timeLeftMs, TimeUnit.MILLISECONDS); + if (responseAndHits == null || responseAndHits.response().timeout()) { + return null; + } + return responseAndHits; + } + private void processResponses(Result result, String summaryClass) throws TimeoutException { try { - int skippedHits = 0; + List skippedHits = new ArrayList<>(); while (outstandingResponses > 0) { - long timeLeftMs = result.getQuery().getTimeLeft(); - if (timeLeftMs <= 0) { - throwTimeout(); - } - var responseAndHits = responses.poll(timeLeftMs, TimeUnit.MILLISECONDS); + var responseAndHits = getNextResponse(result.getQuery().getTimeLeft()); if (responseAndHits == null) { throwTimeout(); } - var response = responseAndHits.getFirst(); - if (response.timeout()) { - throwTimeout(); - } - var hitsContext = responseAndHits.getSecond(); - skippedHits += processResponse(result, response, hitsContext, summaryClass); + skippedHits.addAll(processOneResponse(result, responseAndHits, summaryClass, false)); outstandingResponses--; } - if (skippedHits != 0) { + if (skippedHits.isEmpty()) { + // all done OK + return; + } + maybeRetry(skippedHits, result, summaryClass); + if (! skippedHits.isEmpty()) { result.hits().addError(ErrorMessage .createEmptyDocsums("Missing hit summary data for summary " + summaryClass + " for " + skippedHits + " hits")); } @@ -171,11 +189,16 @@ private void processResponses(Result result, String summaryClass) throws Timeout } } - private int processResponse(Result result, Client.ResponseOrError responseOrError, List hitsContext, - String summaryClass) { + private List processOneResponse( + Result result, + ResponseAndHits responseAndHits, + String summaryClass, + boolean ignoreErrors) + { + var responseOrError = responseAndHits.response(); if (responseOrError.error().isPresent()) { - if (hasReportedError) { - return 0; + if (hasReportedError || ignoreErrors) { + return List.of(); } String error = responseOrError.error().get(); result.hits().addError(ErrorMessage.createBackendCommunicationError(error)); @@ -184,9 +207,9 @@ private int processResponse(Result result, Client.ResponseOrError hits, String summaryClass, byte[] payload) { + private List fill(Result result, List hits, String summaryClass, byte[] payload, boolean ignoreErrors) { try { var protobuf = SearchProtocol.DocsumReply.parseFrom(payload); var root = (decodePolicy == DecodePolicy.ONDEMAND) ? BinaryView.inspect(protobuf.getSlimeSummaries().toByteArray()) : BinaryFormat.decode(protobuf.getSlimeSummaries().toByteArray()).get(); - var errors = root.field("errors"); - boolean hasErrors = errors.valid() && (errors.entries() > 0); - if (hasErrors) { - addErrors(result, errors); + if (! ignoreErrors) { + var errors = root.field("errors"); + boolean hasErrors = errors.valid() && (errors.entries() > 0); + if (hasErrors) { + addErrors(result, errors); + } + convertErrorsFromDocsumReply(result, protobuf.getErrorsList()); } - convertErrorsFromDocsumReply(result, protobuf.getErrorsList()); - Inspector summaries = new SlimeAdapter(root.field("docsums")); if (!summaries.valid()) { - return 0; // No summaries; Perhaps we requested a non-existing summary class + return List.of(); // No summaries; Perhaps we requested a non-existing summary class } - int skippedHits = 0; + List skippedHits = new ArrayList<>(); for (int i = 0; i < hits.size(); i++) { Inspector summary = summaries.entry(i).field("docsum"); - if (summary.valid()) { - hits.get(i).setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); - hits.get(i).addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); - hits.get(i).setFilled(summaryClass); + FastHit hit = hits.get(i); + if (summary.valid() && ! hit.isFilled(summaryClass)) { + hit.setField(Hit.SDDOCNAME_FIELD, documentDb.schema().name()); + hit.addSummary(documentDb.getDocsumDefinitionSet().getDocsum(summaryClass), summary); + hit.setFilled(summaryClass); + ++numOkFilledHits; } else { - skippedHits++; + skippedHits.add(hit); } } return skippedHits; } catch (InvalidProtocolBufferException ex) { - log.log(Level.WARNING, "Invalid response to docsum request", ex); - result.hits().addError(ErrorMessage.createInternalServerError("Invalid response to docsum request from backend")); - return 0; + if (! ignoreErrors) { + log.log(Level.WARNING, "Invalid response to docsum request", ex); + result.hits().addError(ErrorMessage.createInternalServerError("Invalid response to docsum request from backend")); + } } + return List.of(); } private void throwTimeout() throws TimeoutException { throw new TimeoutException("Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); } + /* + * The content layer may return some empty docsums when redistribution is in progress, + * and in that case the document should be present on some other node, and we should + * be able to get the docsum from that node if we retry. But we don't know where + * that would be, so we need to try all possible nodes. + * To avoid overloading the content layer, we only retry if the number of skipped hits + * is below a tunable limit, and if the ratio of failed to ok hits is below another + * tunable limit (if too much failed on first try, it's likely not helpful to retry). + */ + private void maybeRetry(List skippedHits, Result result, String summaryClass) throws InterruptedException { + int numSkipped = skippedHits.size(); + var query = result.getQuery(); + double absoluteRetryLimit = query.properties().getInteger(Dispatcher.docsumRetryLimit, 10); + double retryLimitFactor = query.properties().getDouble(Dispatcher.docsumRetryFactor, 0.5); + double retryLimit = Math.min(absoluteRetryLimit, retryLimitFactor * numHitsToFill); + if (numSkipped < retryLimit) { + result.getQuery().trace(false, 1, "Retry summary fetching for " + numSkipped + " empty docsums (of " + numHitsToFill + " hits)"); + ListMap retryMap = new ListMap<>(); + for (Integer nodeId : resourcePool.knownNodeIds()) { + for (var hit : skippedHits) { + if (hit.getDistributionKey() != nodeId) { + retryMap.put(nodeId, hit); + } + } + } + // no retry if there is only one node + if (retryMap.size() > 0) { + if (shouldLogRetry()) { + log.log(Level.WARNING, "Retry docsum fetch for " + numSkipped + " hits (" + numOkFilledHits + " ok hits)"); + } + summaryNeedsQuery = true; + sendFillRequestByNode(result, summaryClass, retryMap); + while (outstandingResponses > 0 && numOkFilledHits < numHitsToFill) { + var responseAndHits = getNextResponse(query.getTimeLeft()); + if (responseAndHits == null) { + if (shouldLogRetryTimeout()) { + log.log(Level.WARNING, "Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); + } + break; + } + processOneResponse(result, responseAndHits, summaryClass, true); + outstandingResponses--; + } + skippedHits.removeIf(hit -> hit.isFilled(summaryClass)); + } + } else { + result.getQuery().trace(false, 1, "Summary fetching got " + numSkipped + " empty docsums (of " + numHitsToFill + " hits), no retry"); + if (shouldLogNoRetry()) { + log.log(Level.WARNING, "Docsum fetch failed for " + numSkipped + " hits (" + numOkFilledHits + " ok hits), no retry"); + } + } + } + + private static boolean shouldLogForCount(int count) { + if (count < 100) return true; + if (count < 1000) return (count % 100) == 0; + if (count < 100000) return (count % 1000) == 0; + return (count % 10000) == 0; + } + private static final AtomicInteger retryCounter = new AtomicInteger(); + private static final AtomicInteger noRetryCounter = new AtomicInteger(); + private static final AtomicInteger retryTimeoutCounter = new AtomicInteger(); + private static boolean shouldLogRetry() { + int count = retryCounter.getAndAdd(1); + return shouldLogForCount(count); + } + private static boolean shouldLogNoRetry() { + int count = noRetryCounter.getAndAdd(1); + return shouldLogForCount(count); + } + private static boolean shouldLogRetryTimeout() { + int count = retryTimeoutCounter.getAndAdd(1); + return shouldLogForCount(count); + } + } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java index d127ed69df5d..0f82c05658d6 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java @@ -1,7 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.dispatch.rpc; -import com.yahoo.search.dispatch.FillInvoker; import com.yahoo.search.dispatch.rpc.Client.NodeConnection; import com.yahoo.search.dispatch.rpc.RpcClient.RpcNodeConnection; import com.yahoo.vespa.config.search.DispatchConfig; @@ -16,8 +15,7 @@ import java.util.concurrent.ThreadLocalRandom; /** - * RpcResourcePool constructs {@link FillInvoker} objects that communicate with content nodes over RPC. It also contains - * the RPC connection pool. + * RpcResourcePool contains the RPC connection pool. * * @author ollivir */ @@ -86,6 +84,11 @@ public void close() { } } + @Override + public Collection knownNodeIds() { + return nodeConnectionPools.keySet(); + } + private static class NodeConnectionPool implements AutoCloseable { private final List connections; diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java deleted file mode 100644 index e367bb58f804..000000000000 --- a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.streamingvisitors; - -import com.yahoo.log.event.Event; -import com.yahoo.search.query.context.QueryContext; -import com.yahoo.search.result.ErrorMessage; -import com.yahoo.search.searchchain.Execution; -import com.yahoo.search.Query; -import com.yahoo.search.Result; -import com.yahoo.search.Searcher; -import com.yahoo.processing.request.CompoundName; -import com.yahoo.vdslib.VisitorStatistics; - -import java.util.Map; -import java.util.TreeMap; -import java.util.logging.Logger; - -import static com.yahoo.vespa.streamingvisitors.StreamingBackend.STREAMING_STATISTICS; - -/** - * Generates mail-specific query metrics. - */ -public class MetricsSearcher extends Searcher { - - private static final CompoundName metricsearcherId = CompoundName.from("metricsearcher.id"); - private static final CompoundName streamingLoadtype = CompoundName.from("streaming.loadtype"); - - private static final Logger log = Logger.getLogger(MetricsSearcher.class.getName()); - - static class Stats { - long latency = 0; - int count = 0; - int ok = 0; - int failed = 0; - long dataStreamed = 0; - long documentsStreamed = 0; - } - - Map statMap = new TreeMap<>(); - private long lastMetricLog = 0; - - @Override - public Result search(Query query, Execution execution) { - long timeMs = System.currentTimeMillis(); - - // Backwards compatibility - convert metricsearcher.id to streaming.loadtype - // TODO Cleanup at some point - String metricName = query.properties().getString(metricsearcherId); - if (metricName != null) { - query.properties().set(streamingLoadtype, metricName); - } - - Result result = execution.search(query); - - long latency = System.currentTimeMillis() - timeMs; - - metricName = query.properties().getString(streamingLoadtype); - if (metricName == null) { - return result; - } - - synchronized(this) { - Stats stats = statMap.get(metricName); - - if (stats == null) { - stats = new Stats(); - statMap.put(metricName, stats); - } - - stats.count++; - stats.latency += latency; - - if (result.hits().getError() != null && - !result.hits().getErrorHit().hasOnlyErrorCode(ErrorMessage.NULL_QUERY) && - !result.hits().getErrorHit().hasOnlyErrorCode(3)) { - stats.failed++; - } else { - stats.ok++; - } - - VisitorStatistics visitorstats = null; - final QueryContext queryContext = query.getContext(false); - if (queryContext != null) { - visitorstats = (VisitorStatistics)queryContext.getProperty(STREAMING_STATISTICS); - } - if (visitorstats != null) { - stats.dataStreamed += visitorstats.getBytesVisited(); - stats.documentsStreamed += visitorstats.getDocumentsVisited(); - } else { - log.fine("No visitor statistics set in query! - don't use metrics searcher without streaming search"); - } - - if ((timeMs - lastMetricLog) > 60000) { - for (Map.Entry entry : statMap.entrySet()) { - stats = entry.getValue(); - Event.value(entry.getKey() + "_latency", stats.count > 0 ? (double)stats.latency / (double)stats.count : 0); - Event.value(entry.getKey() + "_ok", stats.ok); - Event.value(entry.getKey() + "_failed", stats.failed); - Event.value(entry.getKey() + "_bytesstreamed", stats.dataStreamed); - Event.value(entry.getKey() + "_documentsstreamed", stats.documentsStreamed); - - stats.latency = 0; - stats.count = 0; - } - - lastMetricLog = timeMs; - } - } - - return result; - } -} diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java index 657911742b6c..fa7e52e12ebe 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java @@ -55,7 +55,7 @@ void testReadingConfigurationRuleBase() { void testParent() { assertSemantics("WEAKAND(100) vehiclebrand:audi", "audi cars", "parent"); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", "parent"); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", "parent.sr"); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", "parent.sr"); assertSemantics("WEAKAND(100) vw car", "vw cars", "parent"); assertSemantics("WEAKAND(100) skoda car", "skoda cars", "parent.sr"); } @@ -64,7 +64,7 @@ void testParent() { void testChild1() { assertSemantics("WEAKAND(100) vehiclebrand:skoda", "audi cars", "child1.sr"); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", "child1"); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", "child1"); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", "child1"); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "vw cars", "child1"); assertSemantics("WEAKAND(100) skoda car", "skoda cars", "child1"); } @@ -73,7 +73,7 @@ void testChild1() { void testChild2() { assertSemantics("WEAKAND(100) vehiclebrand:audi", "audi cars", "child2"); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", "child2.sr"); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", "child2.sr"); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", "child2.sr"); assertSemantics("WEAKAND(100) vw car", "vw cars", "child2"); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "skoda cars", "child2"); } @@ -82,7 +82,7 @@ void testChild2() { void testGrandchild() { assertSemantics("WEAKAND(100) vehiclebrand:skoda", "audi cars", "grandchild.sr"); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", "grandchild"); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", "grandchild"); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", "grandchild"); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "vw cars", "grandchild"); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "skoda cars", "grandchild"); } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java index d9ef73c9ddd4..4d54866fe0c0 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java @@ -88,7 +88,7 @@ void testInclusionOrderAndContentDump() { void testParent() { assertSemantics("WEAKAND(100) vehiclebrand:audi", "audi cars", parent); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", parent); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", parent); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", parent); assertSemantics("WEAKAND(100) vw car", "vw cars", parent); assertSemantics("WEAKAND(100) skoda car", "skoda cars", parent); } @@ -97,7 +97,7 @@ void testParent() { void testChild1() { assertSemantics("WEAKAND(100) vehiclebrand:skoda", "audi cars", child1); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", child1); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", child1); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", child1); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "vw cars", child1); assertSemantics("WEAKAND(100) skoda car", "skoda cars", child1); } @@ -106,7 +106,7 @@ void testChild1() { void testChild2() { assertSemantics("WEAKAND(100) vehiclebrand:audi", "audi cars", child2); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", child2); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", child2); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", child2); assertSemantics("WEAKAND(100) vw car", "vw cars", child2); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "skoda cars", child2); } @@ -115,7 +115,7 @@ void testChild2() { void testGrandchild() { assertSemantics("WEAKAND(100) vehiclebrand:skoda", "audi cars", grandchild); assertSemantics("WEAKAND(100) vehiclebrand:alfa", "alfa bus", grandchild); - assertSemantics("AND (WEAKAND(100) vehiclebrand:bmw) expensivetv", "bmw motorcycle", grandchild); + assertSemantics("WEAKAND(100) vehiclebrand:bmw expensivetv", "bmw motorcycle", grandchild); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "vw cars", grandchild); assertSemantics("WEAKAND(100) vehiclebrand:skoda", "skoda cars", grandchild); } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java index 677468c9339c..0dc3e3a8ed9e 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java @@ -61,6 +61,9 @@ void testLocal() { @Test void testLiteralReplacing() { assertSemantics("AND lord of rings", "lotr"); + assertSemantics("AND foo1 lord of rings bar2", "foo1 lotr bar2"); + assertSemantics("WEAKAND(100) lord of rings", "lotr", 0, Query.Type.WEAKAND); + assertSemantics("WEAKAND(100) foo1 lord of rings bar2", "foo1 lotr bar2", 0, Query.Type.WEAKAND); } @Test @@ -144,6 +147,15 @@ void testExplicitContext() { @Test void testOrProduction() { assertSemantics("OR something somethingelse", "something"); + // I did not expect this: + assertSemantics("OR (AND foo1 something bar2) somethingelse", "foo1 something bar2"); + } + + @Test + void testDoubleOrProduction() { + assertSemantics("OR more evenmore", "somethingmore"); + // Strange ordering: + assertSemantics("OR more (AND foo1 bar2) evenmore", "foo1 somethingmore bar2"); } // This test is order dependent. Fix it!! @@ -164,6 +176,17 @@ void testNullQuery() { assertEquals(NullItem.class, query.getModel().getQueryTree().getRoot().getClass()); // Still a NullItem } + @Test + void testPhraseReplacementCornerCase() { + assertSemantics("brand:smashtogether", "\"smash together\""); + assertSemantics("brand:smashtogether", "smash-together"); + assertSemantics("AND foo1 brand:smashtogether bar2", "foo1 \"smash together\" bar2"); + assertSemantics("AND brand:smashtogether \"foo1 bar2\"", "\"foo1 smash together bar2\""); + assertSemantics("OR brand:smashtogether \"foo1 bar2\"", "\"foo1 smash together bar2\"", 0, Query.Type.ANY); + // the difference in ordering here is because the parsed query already has a WEAKAND root (with 1 child): + assertSemantics("WEAKAND(100) \"foo1 bar2\" brand:smashtogether", "\"foo1 smash together bar2\"", 0, Query.Type.WEAKAND); + } + private Result doSearch(Searcher searcher, Query query, int offset, int hits) { query.setOffset(offset); query.setHits(hits); diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr index 51e48213d79a..bd70faa8ef95 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/rules.sr @@ -49,6 +49,9 @@ java +> -coffee; # Adding an or term something +> ?somethingelse; +# Replace with two or terms: +somethingmore -> ?more ?evenmore; + # Adding another negative # TODO: Term types in conditions # java -coffee +> -island @@ -69,3 +72,8 @@ the -> ; [typechange] -> $default:[typechange] ; [typechange] :- typechange; + +# Replacing a phrase and changing index +[myphrase] -> brand:smashtogether ; + +[myphrase] :- smash together; diff --git a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java b/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java deleted file mode 100644 index 5cc0e6b060b5..000000000000 --- a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.streamingvisitors; - -import com.yahoo.component.chain.Chain; -import com.yahoo.search.Query; -import com.yahoo.search.Result; -import com.yahoo.search.Searcher; -import com.yahoo.search.result.ErrorMessage; -import com.yahoo.search.result.Hit; -import com.yahoo.search.searchchain.Execution; -import com.yahoo.vdslib.VisitorStatistics; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.*; - -/** - * @author Ulf Carlin - */ -public class MetricsSearcherTestCase { - - private final MetricsSearcher metricsSearcher = new MetricsSearcher(); - private final MockBackend backend = new MockBackend(); - private final Chain chain = new Chain<>(metricsSearcher, backend); - private final Execution.Context context = Execution.Context.createContextStub(); - private final MetricsSearcher.Stats expStatsLt1 = new MetricsSearcher.Stats(); - private final MetricsSearcher.Stats expStatsLt2 = new MetricsSearcher.Stats(); - - private static final String LOADTYPE1 = "lt1"; - private static final String LOADTYPE2 = "lt2"; - - private void verifySearch(String metricParam, String message, String detailedMessage) { - Result result = new Execution(chain, context).search(new Query("?query=test&" + metricParam)); - assertEquals(1, result.hits().size()); - if (message == null) { - assertEquals("news:0", result.hits().get(0).getId().toString()); - } else { - assertNotNull(result.hits().getError()); - assertTrue(result.hits().getErrorHit().errors().iterator().next().getMessage().contains(message), - "Expected '" + message + "' to be contained in '" - + result.hits().getErrorHit().errors().iterator().next().getMessage() + "'"); - assertTrue(result.hits().getErrorHit().errors().iterator().next().getDetailedMessage().contains(detailedMessage), - "Expected '" + detailedMessage + "' to be contained in '" - + result.hits().getErrorHit().errors().iterator().next().getDetailedMessage() + "'"); - } - - if (metricParam == null) { - return; - } - - MetricsSearcher.Stats expStats; - MetricsSearcher.Stats actualStats; - if (metricParam.contains(LOADTYPE1)) { - expStats = expStatsLt1; - actualStats = metricsSearcher.statMap.get(LOADTYPE1); - } else { - expStats = expStatsLt2; - actualStats = metricsSearcher.statMap.get(LOADTYPE2); - } - - expStats.count++; - if (message == null) { - expStats.ok++; - } else { - expStats.failed++; - } - if (metricParam.contains(LOADTYPE1)) { - expStats.dataStreamed += 16; - expStats.documentsStreamed += 2; - } - - assertEquals(expStats.count, actualStats.count); - assertEquals(expStats.ok, actualStats.ok); - assertEquals(expStats.failed, actualStats.failed); - assertEquals(expStats.dataStreamed, actualStats.dataStreamed); - assertEquals(expStats.documentsStreamed, actualStats.documentsStreamed); - } - - @Test - void testBasics() { - // Start counting at -1 since count is reset upon the first query by MetricsSearcher.search - expStatsLt1.count--; - String[] loadTypes = {LOADTYPE1, LOADTYPE2}; - for (String loadType : loadTypes) { - verifySearch("streaming.loadtype=" + loadType, null, null); - verifySearch("metricsearcher.id=" + loadType, null, null); - verifySearch(null, null, null); - verifySearch("streaming.loadtype=" + loadType, "Backend communication error", "Detailed error message"); - } - - } - - @Test - void searcherDoesNotTryToDereferenceNullQueryContext() { - backend.setImplicitlyCreateContext(false); - // This will crash with an NPE if the searcher does not cope with null - // query contexts. - new Execution(chain, context).search(new Query("?query=test&streaming.loadtype=" + LOADTYPE1)); - } - - private static class MockBackend extends Searcher { - - private int sequenceNumber = 0; - private final VisitorStatistics visitorStats = new VisitorStatistics(); - private boolean implicitlyCreateContext = true; - - private MockBackend() { - visitorStats.setBucketsVisited(1); - visitorStats.setBytesReturned(8); - visitorStats.setBytesVisited(16); - visitorStats.setDocumentsReturned(1); - visitorStats.setDocumentsVisited(2); - } - - public void setImplicitlyCreateContext(boolean implicitlyCreateContext) { - this.implicitlyCreateContext = implicitlyCreateContext; - } - - @Override - public Result search(Query query, Execution execution) { - if (implicitlyCreateContext) { - String loadType = query.properties().getString("streaming.loadtype"); - assignContextProperties(query, loadType); - } - - Result result = new Result(query); - if (sequenceNumber == 3 || sequenceNumber == 7) { - result.hits().addError(ErrorMessage.createBackendCommunicationError("Detailed error message")); - } else { - result.hits().add(new Hit("news:0")); - } - sequenceNumber++; - return result; - } - - private void assignContextProperties(Query query, String loadType) { - if (loadType != null && loadType.equals(LOADTYPE1)) { - query.getContext(true).setProperty(StreamingBackend.STREAMING_STATISTICS, visitorStats); - } else { - query.getContext(true).setProperty(StreamingBackend.STREAMING_STATISTICS, null); - } - } - } - -} diff --git a/dist/release-vespa-rpm.sh b/dist/release-vespa-rpm.sh index ffa4c8e3e2e7..7880d6c44cef 100755 --- a/dist/release-vespa-rpm.sh +++ b/dist/release-vespa-rpm.sh @@ -17,10 +17,11 @@ readonly CURRENT_BRANCH=$(git branch | grep "^\*" | cut -d' ' -f2) git checkout master git pull --rebase -# Create a proper release tag - -git tag -a "$RELEASE_TAG" -m "Release version $VERSION" $GITREF -git push origin "$RELEASE_TAG" +# Create a proper release tag if not there +if [[ $(git rev-list -n 1 "$RELEASE_TAG") != "$GITREF" ]]; then + git tag -a "$RELEASE_TAG" -m "Release version $VERSION" $GITREF + git push origin "$RELEASE_TAG" +fi git reset --hard HEAD git checkout $CURRENT_BRANCH diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java index 39ac44119f9f..b35df86abd8d 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java @@ -111,7 +111,7 @@ private static Map> createScriptsMap(Docume ScriptExpression script = new ScriptExpression(expressions); script.select(fieldPathOptimizer, fieldPathOptimizer); - fieldScripts.put(FULL, new DocumentScript(ilscript.doctype(), ilscript.docfield(),script)); + fieldScripts.put(FULL, new DocumentScript(ilscript.doctype(), ilscript.docfield(), script)); documentFieldScripts.put(ilscript.doctype(), Collections.unmodifiableMap(fieldScripts)); } return Collections.unmodifiableMap(documentFieldScripts); diff --git a/docprocs/src/test/cfg2/documentmanager.cfg b/docprocs/src/test/cfg2/documentmanager.cfg new file mode 100644 index 000000000000..e649ac39a6da --- /dev/null +++ b/docprocs/src/test/cfg2/documentmanager.cfg @@ -0,0 +1,55 @@ +ignoreundefinedfields false +usev8geopositions false +usev8geopositions false +doctype[0].name "document" +doctype[0].idx 10000 +doctype[0].contentstruct 10001 +doctype[0].primitivetype[0].idx 10002 +doctype[0].primitivetype[0].name "bool" +doctype[0].primitivetype[1].idx 10003 +doctype[0].primitivetype[1].name "byte" +doctype[0].primitivetype[2].idx 10004 +doctype[0].primitivetype[2].name "double" +doctype[0].primitivetype[3].idx 10005 +doctype[0].primitivetype[3].name "float" +doctype[0].primitivetype[4].idx 10006 +doctype[0].primitivetype[4].name "float16" +doctype[0].primitivetype[5].idx 10007 +doctype[0].primitivetype[5].name "int" +doctype[0].primitivetype[6].idx 10008 +doctype[0].primitivetype[6].name "long" +doctype[0].primitivetype[7].idx 10010 +doctype[0].primitivetype[7].name "predicate" +doctype[0].primitivetype[8].idx 10011 +doctype[0].primitivetype[8].name "raw" +doctype[0].primitivetype[9].idx 10012 +doctype[0].primitivetype[9].name "string" +doctype[0].primitivetype[10].idx 10014 +doctype[0].primitivetype[10].name "uri" +doctype[0].wsettype[0].idx 10013 +doctype[0].wsettype[0].elementtype 10012 +doctype[0].wsettype[0].createifnonexistent true +doctype[0].wsettype[0].removeifzero true +doctype[0].structtype[0].idx 10001 +doctype[0].structtype[0].name "document.header" +doctype[0].structtype[1].idx 10009 +doctype[0].structtype[1].name "position" +doctype[0].structtype[1].field[0].name "x" +doctype[0].structtype[1].field[0].internalid 914677694 +doctype[0].structtype[1].field[0].type 10007 +doctype[0].structtype[1].field[1].name "y" +doctype[0].structtype[1].field[1].internalid 900009410 +doctype[0].structtype[1].field[1].type 10007 +doctype[1].name "page" +doctype[1].idx 10015 +doctype[1].inherits[0].idx 10000 +doctype[1].contentstruct 10016 +doctype[1].fieldsets{[document]}.fields[0] "domain" +doctype[1].structtype[0].idx 10016 +doctype[1].structtype[0].name "page.header" +doctype[1].structtype[0].field[0].name "domain" +doctype[1].structtype[0].field[0].internalid 1169984294 +doctype[1].structtype[0].field[0].type 10012 +doctype[1].structtype[0].field[1].name "domain_hash" +doctype[1].structtype[0].field[1].internalid 305760502 +doctype[1].structtype[0].field[1].type 10008 diff --git a/docprocs/src/test/cfg2/ilscripts.cfg b/docprocs/src/test/cfg2/ilscripts.cfg new file mode 100644 index 000000000000..a2bea7d1ffdc --- /dev/null +++ b/docprocs/src/test/cfg2/ilscripts.cfg @@ -0,0 +1,7 @@ +maxtermoccurrences 10000 +maxtokenlength 1000 +fieldmatchmaxlength 1000000 +ilscript[0].doctype "page" +ilscript[0].docfield[0] "domain" +ilscript[0].content[0] "clear_state | guard { input domain | hash | attribute domain_hash; }" +ilscript[0].content[1] "clear_state | guard { input domain | exact | index domain | summary domain; }" diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java index df7c1a442d4a..d5b4f2009c1c 100644 --- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java +++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java @@ -33,30 +33,28 @@ */ public class IndexingProcessorTestCase { - private static final String CONFIG_ID = "dir:src/test/cfg"; - - private final IndexingProcessor indexer = newProcessor(CONFIG_ID); - @Test public void requireThatIndexerForwardsDocumentsOfUnknownType() { + var tester = new IndexingProcessorTester(); Document input = new Document(new DocumentType("unknown"), "id:ns:unknown::"); - DocumentOperation output = process(new DocumentPut(input)); + DocumentOperation output = tester.process(new DocumentPut(input)); assertTrue(output instanceof DocumentPut); assertSame(input, ((DocumentPut)output).getDocument()); } @Test public void testPut() { + IndexingProcessorTester tester = new IndexingProcessorTester("src/test/cfg"); // 'combined' gets the value of both // 'combinedWithFallback' falls back to an empty string if an input is missing { // Both artist and title are set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); input.getDocument().setFieldValue(inputType.getField("artist"), new StringFieldValue("artist1")); input.getDocument().setFieldValue(inputType.getField("title"), new StringFieldValue("title1")); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertEquals("artist1", output.getFieldValue("artist").getWrappedValue()); assertEquals("title1", output.getFieldValue("title").getWrappedValue()); assertNull(output.getFieldValue("song")); @@ -65,11 +63,11 @@ public void testPut() { } { // Just artist is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); input.getDocument().setFieldValue(inputType.getField("artist"), new StringFieldValue("artist1")); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertEquals("artist1", output.getFieldValue("artist").getWrappedValue()); assertNull(output.getFieldValue("title")); assertNull(output.getFieldValue("song")); @@ -78,11 +76,11 @@ public void testPut() { } { // Just title is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); input.getDocument().setFieldValue(inputType.getField("title"), new StringFieldValue("title1")); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertEquals("title1", output.getFieldValue("title").getWrappedValue()); assertNull(output.getFieldValue("artist")); assertNull(output.getFieldValue("song")); @@ -91,11 +89,11 @@ public void testPut() { } { // Neither title nor artist is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); input.getDocument().setFieldValue(inputType.getField("song"), new StringFieldValue("song1")); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertNull(output.getFieldValue("artist")); assertNull(output.getFieldValue("title")); assertEquals("song1", output.getFieldValue("song").getWrappedValue()); @@ -104,10 +102,10 @@ public void testPut() { } { // None is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); - Document output = ((DocumentPut)process(input)).getDocument(); + Document output = ((DocumentPut)tester.process(input)).getDocument(); assertNull(output.getFieldValue("artist")); assertNull(output.getFieldValue("title")); assertNull(output.getFieldValue("song")); @@ -116,99 +114,105 @@ public void testPut() { } } + @Test + public void testPutCfg2() { + // Config of the following schema, derived Nov 2024, by SchemaTestCase.testDeriving in the config-model + // + // schema page { + // + // field domain_hash type long { + // indexing: input domain | hash | attribute + // } + // + // document page { + // + // field domain type string { + // indexing: index | summary + // match: word + // rank: filter + // } + // } + // } + IndexingProcessorTester tester = new IndexingProcessorTester("src/test/cfg2"); + + { // Both artist and title are set + DocumentType inputType = tester.getDocumentType("page"); + DocumentPut input = new DocumentPut(inputType, "id:ns:page::"); + input.getDocument().setFieldValue(inputType.getField("domain"), new StringFieldValue("domain1")); + + Document output = ((DocumentPut)tester.process(input)).getDocument(); + assertEquals("domain1", output.getFieldValue("domain").getWrappedValue()); + assertEquals(1386505442371493468L, output.getFieldValue("domain_hash").getWrappedValue()); + } + } + @Test public void testUpdate() { + IndexingProcessorTester tester = new IndexingProcessorTester("src/test/cfg"); // 'combined' gets the value of artist and title // 'combinedWithFallback' falls back to an empty string if an input is missing { // Both artist and title are set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("artist"), new StringFieldValue("artist1"))); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("title"), new StringFieldValue("title1"))); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertEquals(4, output.fieldUpdates().size()); - assertAssignment("artist", "artist1", output); - assertAssignment("title", "title1", output); - assertAssignment("combined", "artist1 title1", output); - assertAssignment("combinedWithFallback", "artist1 title1", output); + tester.assertAssignment("artist", "artist1", output); + tester.assertAssignment("title", "title1", output); + tester.assertAssignment("combined", "artist1 title1", output); + tester.assertAssignment("combinedWithFallback", "artist1 title1", output); } { // Just artist is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("artist"), new StringFieldValue("artist1"))); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertEquals(2, output.fieldUpdates().size()); - assertAssignment("artist", "artist1", output); - assertAssignment("combinedWithFallback", "artist1 ", output); + tester.assertAssignment("artist", "artist1", output); + tester.assertAssignment("combinedWithFallback", "artist1 ", output); } { // Just title is set - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("title"), new StringFieldValue("title1"))); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertEquals(2, output.fieldUpdates().size()); - assertAssignment("title", "title1", output); - assertAssignment("combinedWithFallback", " title1", output); + tester.assertAssignment("title", "title1", output); + tester.assertAssignment("combinedWithFallback", " title1", output); } { // Neither title nor artist is set: Should not update embeddings even though it has fallbacks for all - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("song"), new StringFieldValue("song1"))); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertEquals(1, output.fieldUpdates().size()); - assertAssignment("song", "song1", output); + tester.assertAssignment("song", "song1", output); } { // None is set: Should not update anything - DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentType inputType = tester.getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); - DocumentUpdate output = (DocumentUpdate)process(input); + DocumentUpdate output = (DocumentUpdate)tester.process(input); assertNull(output); } } @Test public void requireThatIndexerForwardsUpdatesOfUnknownType() { + var tester = new IndexingProcessorTester(); DocumentUpdate input = new DocumentUpdate(new DocumentType("unknown"), "id:ns:music::"); - DocumentOperation output = process(input); + DocumentOperation output = tester.process(input); assertSame(input, output); } - private void assertAssignment(String fieldName, String value, DocumentUpdate output) { - FieldUpdate update = output.getFieldUpdate(fieldName); - assertNotNull("Update of '" + fieldName + "' exists", update); - assertEquals(fieldName, update.getField().getName()); - assertEquals(1, update.getValueUpdates().size()); - ValueUpdate combinedAssignment = update.getValueUpdate(0); - assertTrue(combinedAssignment instanceof AssignValueUpdate); - assertEquals(new StringFieldValue(value), combinedAssignment.getValue()); - } - - private DocumentOperation process(DocumentOperation input) { - Processing proc = new Processing(); - proc.getDocumentOperations().add(input); - indexer.process(proc); - - List operations = proc.getDocumentOperations(); - if (operations.isEmpty()) return null; - assertEquals(1, operations.size()); - return operations.get(0); - } - - @SuppressWarnings("deprecation") - private static IndexingProcessor newProcessor(String configId) { - return new IndexingProcessor(new DocumentTypeManager(ConfigGetter.getConfig(DocumentmanagerConfig.class, configId)), - ConfigGetter.getConfig(IlscriptsConfig.class, configId), - new SimpleLinguistics(), - new ComponentRegistry<>()); - } } diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java new file mode 100644 index 000000000000..9a24861ebe06 --- /dev/null +++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTester.java @@ -0,0 +1,73 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.docprocs.indexing; + +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.config.subscription.ConfigGetter; +import com.yahoo.docproc.Processing; +import com.yahoo.document.DocumentOperation; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.config.DocumentmanagerConfig; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.update.AssignValueUpdate; +import com.yahoo.document.update.FieldUpdate; +import com.yahoo.document.update.ValueUpdate; +import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; + +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * @author bratseth + */ +public class IndexingProcessorTester { + + private final IndexingProcessor indexer; + + public IndexingProcessorTester() { + indexer = newProcessor("raw:"); + } + + public IndexingProcessorTester(String configDir) { + indexer = newProcessor("dir:" + configDir); + } + + public DocumentType getDocumentType(String name) { + return indexer.getDocumentTypeManager().getDocumentType(name); + } + + public void assertAssignment(String fieldName, String value, DocumentUpdate output) { + FieldUpdate update = output.getFieldUpdate(fieldName); + assertNotNull("Update of '" + fieldName + "' exists", update); + assertEquals(fieldName, update.getField().getName()); + assertEquals(1, update.getValueUpdates().size()); + ValueUpdate combinedAssignment = update.getValueUpdate(0); + assertTrue(combinedAssignment instanceof AssignValueUpdate); + assertEquals(new StringFieldValue(value), combinedAssignment.getValue()); + } + + public DocumentOperation process(DocumentOperation input) { + Processing proc = new Processing(); + proc.getDocumentOperations().add(input); + indexer.process(proc); + + List operations = proc.getDocumentOperations(); + if (operations.isEmpty()) return null; + assertEquals(1, operations.size()); + return operations.get(0); + } + + @SuppressWarnings("deprecation") + private static IndexingProcessor newProcessor(String configId) { + return new IndexingProcessor(new DocumentTypeManager(ConfigGetter.getConfig(DocumentmanagerConfig.class, configId)), + ConfigGetter.getConfig(IlscriptsConfig.class, configId), + new SimpleLinguistics(), + new ComponentRegistry<>()); + } + +} diff --git a/document/src/vespa/document/select/valuenodes.cpp b/document/src/vespa/document/select/valuenodes.cpp index 8f9c3f950fc6..8b80bae21970 100644 --- a/document/src/vespa/document/select/valuenodes.cpp +++ b/document/src/vespa/document/select/valuenodes.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include LOG_SETUP(".document.select.valuenode"); namespace document::select { @@ -374,8 +374,8 @@ IteratorHandler::getInternalValue(const FieldValue& fval) const default: break; } - LOG(warning, "Tried to use unsupported datatype %s in field comparison", - fval.getDataType()->toString().c_str()); + LOGBP(warning, "Tried to use unsupported datatype %s in field comparison", + fval.getDataType()->toString().c_str()); return std::make_unique(); } @@ -452,10 +452,10 @@ FieldValueNode::getValue(const Context& context) const } } } catch (vespalib::IllegalArgumentException& e) { - LOG(warning, "Caught exception while fetching field from document: %s", e.what()); + LOGBP(warning, "Caught exception while fetching field from document: %s", e.what()); return std::make_unique(); } catch (FieldNotFoundException& e) { - LOG(warning, "Tried to compare to field %s, not found in document type", _fieldExpression.c_str()); + LOGBP(warning, "Tried to compare with field %s, not found in document type", _fieldExpression.c_str()); return std::make_unique(); } } @@ -513,8 +513,8 @@ FieldValueNode::traceValue(const Context &context, std::ostream& out) const } } } catch (FieldNotFoundException& e) { - LOG(warning, "Tried to compare to field %s, not found in document type", - _fieldExpression.c_str()); + LOGBP(warning, "Tried to compare with field %s, not found in document type", + _fieldExpression.c_str()); out << "Field not found in document type " << doc.getType() << ". Returning invalid.\n"; return std::make_unique(); @@ -600,8 +600,7 @@ IdValueNode::getValue(const DocumentId& id) const if (id.getScheme().hasGroup()) { value = id.getScheme().getGroup(); } else { - fprintf(stderr, "***** Returning invalid value for %s\n", - id.toString().c_str()); + LOGBP(warning, "Returning invalid value for IdValueNode of type GROUP for id: %s", id.toString().c_str()); return std::make_unique(); } break; @@ -743,7 +742,7 @@ FunctionValueNode::FunctionValueNode(std::string_view name, } else if (name == "abs") { _function = ABS; } else { - throw ParsingFailedException("No function '" + std::string(name) + "' exist.", + throw ParsingFailedException("No function '" + std::string(name) + "' exists.", VESPA_STRLOC); } } @@ -791,7 +790,7 @@ FunctionValueNode::getValue(std::unique_ptr val) const case Value::Bucket: { throw ParsingFailedException( - "No functioncalls are allowed on value of type bucket", + "No function calls are allowed on value of type bucket", VESPA_STRLOC); break; } diff --git a/eval/src/apps/eval_expr/eval_expr.cpp b/eval/src/apps/eval_expr/eval_expr.cpp index 1b7dcb3f7729..35e5a16b4e2b 100644 --- a/eval/src/apps/eval_expr/eval_expr.cpp +++ b/eval/src/apps/eval_expr/eval_expr.cpp @@ -17,8 +17,13 @@ #include #include #include +#include +#include #include #include +#include +#include +#include #include @@ -33,6 +38,7 @@ using vespalib::slime::Inspector; using vespalib::slime::Cursor; using vespalib::Input; using vespalib::Memory; +using vespalib::SimpleBuffer; using CostProfile = std::vector>; @@ -337,6 +343,11 @@ class Collector { std::string toString() const { return _slime.toString(); } + std::string toCompactString() const { + SimpleBuffer buf; + JsonFormat::encode(_slime.get(), buf, true); + return buf.get().make_string(); + } }; struct EditLineWrapper { @@ -500,6 +511,469 @@ int json_repl_mode(Context &ctx) { } } +// like base64, but replace '/' with '-' and drop padding (note: reserved '+' is still used) +const char *symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"; +std::map make_symbol_map() { + std::map map; + for (int i = 0; i < 64; ++i) { + map[symbols[i]] = i; + } + return map; +} + +// Write bits to url-safe-ish string +struct UrlSafeBitOutput { + int bits = 0; + int num_bits = 0; + std::string result; + void write_bits(int x, int n) { + for (int i = 0; i < n; ++i) { + bits = (bits << 1) | (x & 1); + if (++num_bits == 6) { + result.push_back(symbols[bits]); + num_bits = 0; + bits = 0; + } + x >>= 1; + } + } + void flush() { + if (num_bits != 0) { + write_bits(0, 6 - num_bits); + } + } +}; + +// Read bits from url-safe-ish string +struct UrlSafeBitInput { + int bits = 0; + int num_bits = 0; + size_t offset = 0; + static constexpr int bit_read_mask = (1 << 5); + static const std::map symbol_map; + const std::string &str; + UrlSafeBitInput(const std::string &str_in) noexcept : str(str_in) {} + int read_bits(int n) { + int x = 0; + int b = 1; + for (int i = 0; i < n; ++i) { + if (num_bits == 0) { + REQUIRE(offset < str.size()); // input underflow + auto pos = symbol_map.find(str[offset++]); + REQUIRE(pos != symbol_map.end()); // invalid input character + bits = pos->second; + num_bits = 6; + } + if (bits & bit_read_mask) { + x |= b; + } + b <<= 1; + bits <<= 1; + --num_bits; + } + return x; + } +}; +const std::map UrlSafeBitInput::symbol_map = make_symbol_map(); + +// keeps track of how many bits to use for dict references +struct BitWidthTracker { + int num; + int next; + BitWidthTracker(int num_in, int next_in) noexcept + : num(num_in), next(next_in) {} + void use() { + if (--next == 0) { + next = 1 << num; + ++num; + } + } + int width() { + return num; + } +}; + +// unified dictionary satisfying the needs of both compress and decompress +struct LZDict { + std::map map; + std::vector list; + static constexpr int lit8 = 0; + static constexpr int lit16 = 1; + static constexpr int eof = 2; + LZDict() { + list.push_back(""); // 0 + list.push_back(""); // 1 + list.push_back(""); // 2 + // we cannot put these in the forward dictionary since they + // could produce duplicates which we check for + } + int size() { return list.size(); } + bool has(const std::string &key) { + return (map.count(key) == 1); + } + int add(const std::string &key) { + REQUIRE(map.count(key) == 0); // no duplicates + int value = list.size(); + list.push_back(key); + map[key] = value; + return value; + } + std::string get(int value) { + REQUIRE(value < size()); // check with size first + return list[value]; + } + int get(const std::string &key) { + REQUIRE(map.count(key) == 1); // check with has first + return map[key]; + } +}; + +// ascii-only lz_string compression (https://github.com/pieroxy/lz-string) +void compress_impl(const std::string &str, auto &bits, auto &dict, auto &dst) { + + std::set pending; + std::string ctx_wc; + std::string ctx_w; + + for (char c: str) { + std::string ctx_c(1, c); + if (!dict.has(ctx_c)) { + dict.add(ctx_c); + pending.insert(ctx_c); + } + ctx_wc = ctx_w + ctx_c; + if (dict.has(ctx_wc)) { + ctx_w = ctx_wc; + } else { + if (pending.count(ctx_w) == 1) { + REQUIRE_EQ(ctx_w.size(), 1zu); + dst.write_bits(dict.lit8, bits.width()); + dst.write_bits(ctx_w[0], 8); + bits.use(); + pending.erase(ctx_w); + } else { + dst.write_bits(dict.get(ctx_w), bits.width()); + } + dict.add(ctx_wc); + bits.use(); + ctx_w = ctx_c; + } + } + if (!ctx_w.empty()) { + if (pending.count(ctx_w) == 1) { + dst.write_bits(dict.lit8, bits.width()); + dst.write_bits(ctx_w[0], 8); + bits.use(); + pending.erase(ctx_w); + } else { + dst.write_bits(dict.get(ctx_w), bits.width()); + } + bits.use(); + } + dst.write_bits(dict.eof, bits.width()); + dst.flush(); +} + +// ascii-only lz_string decompression (https://github.com/pieroxy/lz-string) +std::string decompress_impl(auto &src, auto &bits, auto &dict) { + + std::string result; + + int c = src.read_bits(bits.width()); + if (c == dict.eof) { + return result; + } + REQUIRE_EQ(c, dict.lit8); // ascii only + c = src.read_bits(8); + std::string w(1, char(c)); + result.append(w); + dict.add(w); + bits.use(); + + std::string entry; + for (;;) { + c = src.read_bits(bits.width()); + REQUIRE(c != dict.lit16); // ascii only + if (c == dict.eof) { + return result; + } + if (c == dict.lit8) { + c = dict.add(std::string(1, char(src.read_bits(8)))); + bits.use(); + } + REQUIRE(c <= dict.size()); // invalid dict entry + if (c == dict.size()) { + entry = w + w.substr(0, 1); + } else { + entry = dict.get(c); + } + result.append(entry); + dict.add(w + entry.substr(0, 1)); + bits.use(); + w = entry; + } +} + +// used to encode setups in tensor playground +std::string compress(const std::string &str) { + LZDict dict; + BitWidthTracker bits(2, 2); + UrlSafeBitOutput dst; + compress_impl(str, bits, dict, dst); + return dst.result; +} + +// used to test the compression code above, hence the inlined REQUIREs +std::string decompress(const std::string &str) { + LZDict dict; + BitWidthTracker bits(2, 1); + UrlSafeBitInput src(str); + return decompress_impl(src, bits, dict); +} + +// What happens during compression and decompression, the full story +struct LZLog { + static constexpr int BW = 31; + static constexpr int PW = 18; + struct Block { + std::vector writer; + std::vector reader; + void dump(size_t idx) { + if (writer.empty() && reader.empty()) { + return; + } + size_t len = reader.size() + 1; + if (idx == 0) { + len = std::max(len, writer.size()); + } else { + len = std::max(len, writer.size() + 1); + } + size_t wait = (len - writer.size()); + for (size_t i = 0; i < len; ++i) { + fprintf(stderr, "%*s%*s%-*s\n", + BW, (i >= wait) ? writer[i - wait].c_str() : "", + PW, "", + BW, (i < reader.size()) ? reader[i].c_str() : ""); + } + } + }; + struct Packet { + int bits; + int value; + std::string writer; + std::string reader; + Packet(int bits_in, int value_in) noexcept + : bits(bits_in), value(value_in) {} + void dump() { + fprintf(stderr, "%*s%*s%-*s\n", + BW, writer.c_str(), + PW, fmt(" -> %2db:%6d -> ", bits, value).c_str(), + BW, reader.c_str()); + } + }; + std::vector blocks; + std::vector packets; + void ensure_block(size_t idx) { + while (blocks.size() <= idx) { + blocks.emplace_back(); + } + } + void writer(int block, const std::string &msg) { + ensure_block(block); + blocks[block].writer.push_back(msg); + } + int ensure_packet(int block, int bits, int value) { + if (packets.size() <= size_t(block)) { + REQUIRE_EQ(packets.size(), size_t(block)); + packets.emplace_back(bits, value); + } else { + REQUIRE_EQ(packets[block].bits, bits); + REQUIRE_EQ(packets[block].value, value); + } + return block + 1; + } + int write_packet(int block, int bits, int value, const std::string &msg) { + int res = ensure_packet(block, bits, value); + packets[block].writer = msg; + return res; + } + int read_packet(int block, int bits, int value, const std::string &msg) { + int res = ensure_packet(block, bits, value); + packets[block].reader = msg; + return res; + } + void reader(int block, const std::string &msg) { + ensure_block(block); + blocks[block].reader.push_back(msg); + } + void dump() { + std::string bsep(BW, '-'); + std::string psep(PW, '-'); + REQUIRE_EQ(blocks.size(), packets.size() + 1); + fprintf(stderr, "%s%s%s\n", bsep.c_str(), psep.c_str(), bsep.c_str()); + fprintf(stderr, "%*s%*s%-*s\n", BW, "COMPRESS", PW, "DATA ", BW, "DECOMPRESS"); + fprintf(stderr, "%s%s%s\n", bsep.c_str(), psep.c_str(), bsep.c_str()); + for (size_t i = 0; i < blocks.size(); ++i) { + blocks[i].dump(i); + if (i < packets.size()) { + packets[i].dump(); + } + } + fprintf(stderr, "%s%s%s\n", bsep.c_str(), psep.c_str(), bsep.c_str()); + } + ~LZLog(); + struct Writer { + LZLog &log; + size_t idx = 0; + LZDict dict; + bool expect_lit8 = false; + BitWidthTracker bits{2,2}; + UrlSafeBitOutput dst; + Writer(LZLog &log_in) : log(log_in) {} + ~Writer(); + + static constexpr int lit8 = LZDict::lit8; + static constexpr int lit16 = LZDict::lit16; + static constexpr int eof = LZDict::eof; + + int width() { return bits.width(); } + bool has(const std::string &key) { return dict.has(key); } + int get(const std::string &key) { return dict.get(key); } + + int add(const std::string &key) { + int value = dict.add(key); + log.writer(idx, fmt("dict[%s] -> %d", key.c_str(), value)); + return value; + } + void use() { + int before = bits.width(); + bits.use(); + int after = bits.width(); + log.writer(idx, fmt("bit width %d -> %d", before, after)); + } + void write_bits(int x, int n) { + std::string msg; + if (expect_lit8) { + msg = fmt("write lit8 '%c'", char(x)); + } else { + switch (x) { + case lit8: + msg = "write lit8 tag"; + break; + case eof: + msg = "write EOF tag"; + break; + default: + msg = fmt("write entry '%s'", dict.get(x).c_str()); + } + } + expect_lit8 = (x == lit8); + dst.write_bits(x, n); + idx = log.write_packet(idx, n, x, msg); + } + void flush() { + dst.flush(); + log.writer(idx, fmt("flush bits")); + } + }; + struct Reader { + LZLog &log; + size_t idx = 0; + LZDict dict; + bool expect_lit8 = false; + int prev = -1; + BitWidthTracker bits{2,1}; + UrlSafeBitInput src; + Reader(LZLog &log_in, const std::string &str) : log(log_in), src(str) {} + ~Reader(); + + static constexpr int lit8 = LZDict::lit8; + static constexpr int lit16 = LZDict::lit16; + static constexpr int eof = LZDict::eof; + + int width() { return bits.width(); } + int size() { return dict.size(); } + std::string get(int value) { return dict.get(value); } + + int read_bits(int n) { + int x = src.read_bits(n); + std::string msg; + if (expect_lit8) { + msg = fmt("read lit8 '%c'", char(x)); + prev = dict.size(); + } else { + switch (x) { + case lit8: + msg = "read lit8 tag"; + prev = -1; + break; + case eof: + msg = "read EOF tag"; + prev = -1; + break; + default: + if (x == dict.size()) { + REQUIRE(prev != -1); + std::string entry = dict.get(prev); + entry.push_back(entry[0]); + msg = fmt("infer entry '%s'", entry.c_str()); + } else { + msg = fmt("read entry '%s'", dict.get(x).c_str()); + } + prev = x; + } + } + expect_lit8 = (x == lit8); + idx = log.read_packet(idx, n, x, msg); + return x; + } + void use() { + int before = bits.width(); + bits.use(); + int after = bits.width(); + log.reader(idx, fmt("bit width %d -> %d", before, after)); + } + int add(const std::string &key) { + int value = dict.add(key); + log.reader(idx, fmt("dict[%s] -> %d", key.c_str(), value)); + return value; + } + }; + static LZLog analyze(const std::string &str) { + LZLog log; + Writer writer(log); + compress_impl(str, writer, writer, writer); + Reader reader(log, writer.dst.result); + auto res = decompress_impl(reader, reader, reader); + REQUIRE_EQ(res, str); + return log; + } +}; + +LZLog::~LZLog() = default; +LZLog::Writer::~Writer() = default; +LZLog::Reader::~Reader() = default; + +void verify_compr(std::string str) { + auto compr = compress(str); + auto res = decompress(compr); + REQUIRE_EQ(str, res); + fprintf(stderr, "'%s' -> '%s' -> '%s'\n", str.c_str(), compr.c_str(), res.c_str()); + auto log = LZLog::analyze(str); + log.dump(); +} + +void run_tests() { + REQUIRE_EQ(strlen(symbols), 64zu); + verify_compr(""); + verify_compr("abcdef"); + verify_compr("aaaaaa"); + verify_compr("baaaaaa"); + verify_compr("cbaaaaaa"); + verify_compr("ababababababab"); + verify_compr("a and b and c and d"); +} + int main(int argc, char **argv) { bool verbose = ((argc > 1) && (std::string(argv[1]) == "--verbose")); int expr_idx = verbose ? 2 : 1; @@ -538,9 +1012,35 @@ int main(int argc, char **argv) { return 3; } } + if ((expr_cnt == 3) && + (std::string(argv[expr_idx]) == "interactive") && + (std::string(argv[expr_idx + 2]) == "link")) + { + setlocale(LC_ALL, ""); + Collector collector; + collector.enable(); + interactive_mode(ctx, Script::from_file(argv[expr_idx + 1])->script_only(true), collector); + if (collector.error().empty()) { + auto hash = compress(collector.toCompactString()); + fprintf(stdout, "https://docs.vespa.ai/playground/#%s\n", hash.c_str()); + return 0; + } else { + fprintf(stderr, "conversion failed: %s\n", collector.error().c_str()); + return 3; + } + } if ((expr_cnt == 1) && (std::string(argv[expr_idx]) == "json-repl")) { return json_repl_mode(ctx); } + if ((expr_cnt == 1) && (std::string(argv[expr_idx]) == "test")) { + try { + run_tests(); + } catch (std::exception &e) { + fprintf(stderr, "test failed: %s\n", e.what()); + return 3; + } + return 0; + } ctx.verbose(verbose); std::string name("a"); for (int i = expr_idx; i < argc; ++i) { diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index bb96361ae41e..283b716b6d98 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -19,6 +19,7 @@ import static com.yahoo.vespa.flags.Dimension.HOSTNAME; import static com.yahoo.vespa.flags.Dimension.INSTANCE_ID; import static com.yahoo.vespa.flags.Dimension.NODE_TYPE; +import static com.yahoo.vespa.flags.Dimension.SYSTEM; import static com.yahoo.vespa.flags.Dimension.TENANT_ID; import static com.yahoo.vespa.flags.Dimension.VESPA_VERSION; @@ -291,12 +292,12 @@ public class Flags { NODE_TYPE, HOSTNAME); public static final UnboundListFlag ZONAL_WEIGHTED_ENDPOINT_RECORDS = defineListFlag( - "zonal-weighted-endpoint-records", List.of(), String.class, List.of("jonmv"), "2023-12-15", "2024-12-01", + "zonal-weighted-endpoint-records", List.of(), String.class, List.of("hmusum"), "2023-12-15", "2025-02-01", "A list of weighted (application) endpoint fqdns for which we should use zonal endpoints as targets, not LBs.", "Takes effect at redeployment from controller"); public static final UnboundListFlag WEIGHTED_ENDPOINT_RECORD_TTL = defineListFlag( - "weighted-endpoint-record-ttl", List.of(), String.class, List.of("jonmv"), "2023-05-16", "2024-12-01", + "weighted-endpoint-record-ttl", List.of(), String.class, List.of("hmusum"), "2023-05-16", "2025-02-01", "A list of endpoints and custom TTLs, on the form \"endpoint-fqdn:TTL-seconds\". " + "Where specified, CNAME records are used instead of the default ALIAS records, which have a default 60s TTL.", "Takes effect at redeployment from controller"); @@ -397,6 +398,12 @@ public class Flags { "Whether to sync tenants to HubSpot", "Takes effect immediately"); + public static UnboundBooleanFlag ATLASSIAN_SYNC_TENANTS = defineFeatureFlag( + "atlassian-sync-tenants", false, + List.of("bjormel"), "2024-11-11", "2025-01-01", + "Whether to sync tenants to Atlassian", + "Takes effect immediately"); + public static final UnboundBooleanFlag SYMMETRIC_PUT_AND_ACTIVATE_REPLICA_SELECTION = defineFeatureFlag( "symmetric-put-and-activate-replica-selection", false, List.of("vekterli"), "2024-05-23", "2024-12-01", @@ -423,7 +430,7 @@ public class Flags { public static final UnboundBooleanFlag LAUNCH_APPLICATION_ATHENZ_SERVICE = defineFeatureFlag( "launch-application-athenz-service", false, - List.of("jonmv"), "2024-06-11", "2025-01-10", + List.of("hmusum"), "2024-06-11", "2025-02-01", "Whether to launch an Athenz service unique to the application. Only valid in public systems!", "Takes effect on next deployment", INSTANCE_ID); @@ -435,7 +442,7 @@ public class Flags { "Takes effect immediately"); public static final UnboundBooleanFlag DISTRIBUTION_CONFIG_FROM_CLUSTER_CONTROLLER = defineFeatureFlag( - "distribution-config-from-cluster-controller", false, + "distribution-config-from-cluster-controller", true, List.of("vekterli"), "2024-07-01", "2024-12-01", "Iff true, the cluster controller will be the authoritative source of distribution " + "config changes in a content cluster, and distribution changes will be part of explicitly " + @@ -491,6 +498,27 @@ public class Flags { + "e.g. when running tests to avoid writing a large, sparse, mostly unused file", "Takes effect on restart of Docker container"); + public static final UnboundBooleanFlag ENFORCE_EMAIL_DOMAIN_SSO = defineFeatureFlag( + "enforce-email-domain-sso", false, + List.of("eirik"), "2024-11-07", "2025-02-07", + "Enforce SSO login for an email domain", + "Takes effect immediately", + CONSOLE_USER_EMAIL); + + public static final UnboundListFlag RESTRICT_USERS_TO_DOMAIN = defineListFlag( + "restrict-users-to-domain", List.of(), String.class, + List.of("eirik"), "2024-11-07", "2025-02-07", + "Only allow adding specific email domains as user to tenant", + "Takes effect immediately", + TENANT_ID); + + public static final UnboundBooleanFlag LEGACY_AUTH0_FILTER = defineFeatureFlag( + "use-legacy-auth0-filter", true, + List.of("eirik"), "2024-11-07", "2025-02-07", + "Use legacy auth0 request filter, or new one", + "Takes after controller restart", + SYSTEM); + /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List owners, String createdAt, String expiresAt, String description, diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java index 8d1d6c9f94e6..11f0f1edd020 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java @@ -278,6 +278,12 @@ public class PermanentFlags { INSTANCE_ID ); + public static final UnboundIntFlag DELAY_HOST_SECURITY_AGENT_START_MINUTES = defineIntFlag( + "delay-host-security-agent-start-minutes", 5, + "The number of minutes (from host admin start) to delay the start of the host security agent", + "Takes effect on next host-admin tick", + NODE_TYPE); + // This must be set in a feature flag to avoid flickering between the new and old value during config server upgrade public static final UnboundDoubleFlag HOST_MEMORY = defineDoubleFlag( "host-memory", -1.0, diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java index f38e03a3aed6..fdebde6c4945 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java @@ -55,7 +55,7 @@ protected void doVerify(VerificationContext context) { @Override protected void doExecute(ExecutionContext context) { StringFieldValue input = (StringFieldValue) context.getCurrentValue(); - if (DataType.INT.equals(targetType) || requireOutputType().equals(DataType.INT)) + if (DataType.INT.equals(targetType) || ( ! DataType.LONG.equals(targetType) && requireOutputType().equals(DataType.INT))) context.setCurrentValue(new IntegerFieldValue(hashToInt(input.getString()))); else if (DataType.LONG.equals(targetType) || requireOutputType().equals(DataType.LONG)) context.setCurrentValue(new LongFieldValue(hashToLong(input.getString()))); diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/LowerCaseExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/LowerCaseExpression.java index a7d77c5342d4..66a9ac57038e 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/LowerCaseExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/LowerCaseExpression.java @@ -17,7 +17,7 @@ public LowerCaseExpression() { @Override public DataType setInputType(DataType inputType, VerificationContext context) { - return super.setInputType(inputType, context); + return super.setInputType(inputType, DataType.STRING, context); } @Override diff --git a/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml b/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml index d958cabef3ae..91bc5da8b5ff 100644 --- a/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml +++ b/integration/schema-language-server/clients/intellij/src/main/resources/META-INF/plugin.xml @@ -21,11 +21,20 @@
  • Renaming
  • +

    YQL Features

    +
      +
    • Error highlighting
    • +
    • Syntax highlighting
    • +
    • Running queries directly from .yql files +
    + +

    Requirements

    +The plugin requires Vespa CLI to be installed to be able to run Vespa Queries from .yql files. + ]]>
    Refactored to use LSP4IJ -The plugin will now support better syntax highlighting with semantic tokens and renaming. -In addition, the plugin will be available for community editions as well. +

    Simple support for YQL

    +The plugin now supports syntax highlighting of .yql files, in addition to run the queries directly from the editor. ]]>
    com.intellij.modules.platform com.redhat.devtools.lsp4ij @@ -38,6 +47,9 @@ In addition, the plugin will be available for community editions as well. + { - const r = await next(document, position, context, token); - return r; - }, - provideDocumentHighlights: async (document, position, token, next) => { - const r = await next(document, position, token); - return r; - }, - provideDocumentSemanticTokens: async (document, token, next) => { - const r = await next(document, token); - return r; - }, - }, + documentSelector: [ + { + scheme: 'file', + language: 'vespaSchema', + }, + { + scheme: 'file', + language: 'vespaYQL' + } + ], synchronize: { fileEvents: vscode.workspace.createFileSystemWatcher("**/*{.sd,.profile}") } diff --git a/integration/schema-language-server/language-server/src/main/ccc/grouping/GroupingParser.ccc b/integration/schema-language-server/language-server/src/main/ccc/grouping/GroupingParser.ccc index f8f18598eca9..a84f2392585b 100644 --- a/integration/schema-language-server/language-server/src/main/ccc/grouping/GroupingParser.ccc +++ b/integration/schema-language-server/language-server/src/main/ccc/grouping/GroupingParser.ccc @@ -40,6 +40,12 @@ INJECT GroupingParser: } +INJECT GroupingParserLexer: +{ + public static EnumSet getRegularTokens() { + return EnumSet.copyOf(regularTokens); + } +} TOKEN : diff --git a/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc b/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc index 027192fce509..9dfee585d334 100644 --- a/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc +++ b/integration/schema-language-server/language-server/src/main/ccc/yqlplus/YQLPlus.ccc @@ -12,6 +12,13 @@ INJECT YQLPlusParser: protected Deque expression_stack = new ArrayDeque<>(); } +INJECT YQLPlusLexer: +{ + public static EnumSet getRegularTokens() { + return EnumSet.copyOf(regularTokens); + } +} + // -------------------------------------------------------------------------------- // // Token declarations. @@ -466,7 +473,7 @@ argument(boolean in_select): expression(boolean select): ( null_operator - | annotate_expression + | (SCAN annotate_expression => annotate_expression) | logical_OR_expression ) ; @@ -515,7 +522,7 @@ equality_expression: in_not_in_target: ( // TODO: Add expression stack peek - ( select_statement ) + SCAN 2 => ( select_statement ) | literal_list ) ; @@ -609,7 +616,7 @@ primary_expression: ( expression(in_select) ) | constant_expression | ( - (SCAN 2 => call_expression(in_select)) + (SCAN namespaced_name => call_expression(in_select) ) | fieldref ) ) diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/common/command/commandtypes/RunVespaQuery.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/common/command/commandtypes/RunVespaQuery.java index b66d446da789..b626bb546b24 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/common/command/commandtypes/RunVespaQuery.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/common/command/commandtypes/RunVespaQuery.java @@ -56,7 +56,7 @@ public Object execute(EventExecuteCommandContext context) { runVespaQuery(queryCommand, context.logger).thenAccept(result -> { if (!result.success()) { - if (result.result().toLowerCase().contains("command not found")) { + if (result.result().toLowerCase().contains("cannot run program")) { context.messageHandler.sendMessage(MessageType.Error, "Could not find vespa CLI. Make sure vespa CLI is installed and added to path. Download vespa CLI here: https://docs.vespa.ai/en/vespa-cli.html"); return; } @@ -107,13 +107,10 @@ private CompletableFuture runVespaQuery(String query, ClientLogger ProcessBuilder builder = new ProcessBuilder(); - String queryEscaped = query.replace("\"", "\\\""); - String vespaCommand = String.format("vespa query \"%s\"", queryEscaped); - if (isWindows) { - builder.command("cmd.exe", "/c", vespaCommand); // TODO: Test this on windows + builder.command("cmd.exe", "/c", "vespa", "query", query); // TODO: Test this on windows } else { - builder.command("/bin/sh", "-c", vespaCommand); + builder.command("vespa", "query", query); } return CompletableFuture.supplyAsync(() -> { @@ -146,8 +143,7 @@ private CompletableFuture runVespaQuery(String query, ClientLogger } catch (InterruptedException e) { return new QueryResult(false, "Program interrupted"); } catch (IOException e) { - logger.error(e.getMessage()); - return new QueryResult(false, "IOException occurred."); + return new QueryResult(false, e.getMessage()); } }); } diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java index 7a9d5bd3ddae..3633a5bd378c 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/schemadocument/YQLDocument.java @@ -21,7 +21,6 @@ import ai.vespa.schemals.tree.Node; import ai.vespa.schemals.tree.SchemaNode; import ai.vespa.schemals.tree.YQLNode; -import ai.vespa.schemals.tree.YQL.YQLUtils; public class YQLDocument implements DocumentManager { @@ -107,6 +106,8 @@ private static YQLPartParseResult parseYQLPart(CharSequence content, ClientLogge int charsRead = parser.getToken(0).getEndOffset(); + if (charsRead == 0) return new YQLPartParseResult(List.of(), Optional.empty(), charsRead); + ai.vespa.schemals.parser.yqlplus.Node node = parser.rootNode(); YQLNode retNode = new YQLNode(node, offset); // YQLUtils.printTree(logger, node); @@ -114,6 +115,33 @@ private static YQLPartParseResult parseYQLPart(CharSequence content, ClientLogge return new YQLPartParseResult(List.of(), Optional.of(retNode), charsRead); } + private static boolean detectContinuation(String inputString) { + for (int i = 0; i < inputString.length(); i++) { + if (inputString.charAt(i) != ' ') { + return inputString.charAt(i) == '{'; + } + } + return false; + } + + private static YQLPartParseResult parseContinuation(String inputString, Position offset) { + + YQLPlusParser parser = new YQLPlusParser(inputString); + + try { + parser.map_expression(); + } catch (ParseException exception) { + // Ignored, marked as dirty node + } + + var node = parser.rootNode(); + YQLNode retNode = new YQLNode(node, offset); + + int charsRead = parser.getToken(0).getEndOffset(); + + return new YQLPartParseResult(List.of(), Optional.of(retNode), charsRead); + } + private static YQLPartParseResult parseYQLQuery(ParseContext context, String queryString, Position offset) { YQLNode ret = new YQLNode(new Range(offset, offset)); @@ -139,14 +167,36 @@ private static YQLPartParseResult parseYQLQuery(ParseContext context, String que Position groupOffset = CSTUtils.addPositions(groupOffsetWithoutPipe, new Position(0, 1)); // Add pipe char ret.addChild(new YQLNode(new Range(groupOffsetWithoutPipe, groupOffset), "|")); - - YQLPartParseResult groupingResult = VespaGroupingParser.parseVespaGrouping(groupingString, context.logger(), groupOffset); - if (groupingResult.CST.isPresent()) { - ret.addChild(groupingResult.CST.get()); + charsRead++; + + // Look for continuation + boolean continuationDetected = detectContinuation(groupingString); + if (continuationDetected) { + YQLPartParseResult continuationResults = parseContinuation(groupingString, groupOffset); + + diagnostics.addAll(continuationResults.diagnostics()); + if (continuationResults.CST().isPresent()) { + ret.addChild(continuationResults.CST().get()); + } + + charsRead += continuationResults.charsRead(); + String continuationString = groupingString.substring(0, continuationResults.charsRead()); + Position continuationPosition = StringUtils.getStringPosition(continuationString); + + groupingString = groupingString.substring(continuationResults.charsRead()); + groupOffset = CSTUtils.addPositions(groupOffset, continuationPosition); + } + + if (groupingString.length() > 0 && groupingString.strip().length() > 0) { + YQLPartParseResult groupingResult = VespaGroupingParser.parseVespaGrouping(groupingString, context.logger(), groupOffset); + if (groupingResult.CST.isPresent()) { + ret.addChild(groupingResult.CST.get()); + } + + diagnostics.addAll(groupingResult.diagnostics()); + charsRead += groupingResult.charsRead(); // Add one for the pipe symbol } - diagnostics.addAll(groupingResult.diagnostics()); - charsRead += 1 + groupingResult.charsRead(); // Add one for the pipe symbol } } @@ -180,6 +230,8 @@ public static ParseResult parseContent(ParseContext context) { if (result.CST().isPresent()) { ret.addChild(result.CST().get()); } + + if (result.charsRead() == 0) result.charsRead++; int newOffset = content.indexOf('\n', charsRead + result.charsRead()); if (newOffset == -1) { diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java index 39ac8ba9928c..a4d22419e2c3 100644 --- a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java +++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/ParserTokensTest.java @@ -4,20 +4,31 @@ import java.lang.reflect.Field; import java.util.ArrayList; +import java.util.Arrays; import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestFactory; +import org.antlr.v4.runtime.Vocabulary; +import org.junit.jupiter.api.DynamicTest; import ai.vespa.schemals.parser.SchemaParserLexer; import ai.vespa.schemals.parser.indexinglanguage.IndexingParserLexer; import ai.vespa.schemals.parser.rankingexpression.RankingExpressionParserLexer; +import ai.vespa.schemals.parser.grouping.GroupingParserLexer; +import ai.vespa.schemals.parser.yqlplus.YQLPlusLexer; +import com.vladsch.flexmark.parser.Parser; import com.yahoo.schema.parser.SchemaParserConstants; import com.yahoo.vespa.indexinglanguage.parser.IndexingParserConstants; import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParserConstants; +import com.yahoo.search.grouping.request.parser.GroupingParserConstants; +import com.yahoo.search.yql.yqlplusLexer; + /** * Tests that the set of tokens declared in JavaCC parsers are also present in CongoCC parsers. @@ -46,6 +57,11 @@ public class ParserTokensTest { "OCTAL" ); + public static Set antlrSpecialTokens = Set.of( + "COMMENT", + "WS" + ); + private List findMissingTokens(Field[] javaCCFields, Set congoCCTokenStrings) { List missing = new ArrayList<>(); @@ -102,4 +118,50 @@ public void testRankingExpressionTokenList() { List missing = findMissingTokens(javaCCFields, congoCCTokenStrings); assertEquals(0, missing.size(), "Missing ranking expression tokens in CongoCC: " + String.join(", ", missing)); } + + @Test + public void testVespaGroupingTokenList() { + Field[] javaCCFields = GroupingParserConstants.class.getDeclaredFields(); + + Set congoCCTokenStrings = new HashSet<>(); + + for (var tokenType : GroupingParserLexer.getRegularTokens()) { + congoCCTokenStrings.add(tokenType.toString()); + } + + List missing = findMissingTokens(javaCCFields, congoCCTokenStrings); + assertEquals(0, missing.size(), "Missing ranking expression tokens in CongoCC: " + String.join(", ", missing)); + } + + @Test + public void testYQLPlusTokenList() { + Vocabulary vocabulary = yqlplusLexer.VOCABULARY; + + Set antlrTokens = new HashSet<>(); + + for (int i = 0; i < vocabulary.getMaxTokenType(); i++) { + String symbolicName = vocabulary.getSymbolicName(i); + if (symbolicName != null) { + antlrTokens.add(symbolicName); + } + } + + Set congoCCTokenStrings = new HashSet<>(); + + for (var tokenType : YQLPlusLexer.getRegularTokens()) { + congoCCTokenStrings.add(tokenType.toString()); + } + + List missing = new ArrayList<>(); + for (var token : antlrTokens) { + if (antlrSpecialTokens.contains(token)) continue; + + if (!congoCCTokenStrings.contains(token)) { + missing.add(token); + } + } + + assertEquals(0, missing.size(), "Missing yqlplus tokens in CongoCC: " + String.join(", ", missing)); + + } } diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java index cf4dea61e3e5..d5808b23dd5a 100644 --- a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java +++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/YQLParserTest.java @@ -44,6 +44,68 @@ void checkQueryParses(int expectedErrors, String input) throws Exception { @TestFactory Stream generateGoodTests() { + String[] groupingQueries = new String[] { + + // From docs: /en/grouping.html + "all( group(customer) each(output(sum(price))) )", + "all(group(customer) max(2) precision(12) order(-count()) each(output(sum(price))))", + "all(group(customer) each(max(3) each(output(summary()))))", + "all(group(a) max(5) each(output(count())))", + "all(group(a) max(5) each(output(count()) max(7) each(output(summary()))))", + "all(all(group(a) max(3) each(output(count()) max(5) each(output(summary())))) all(group(b) max(3) each(output(count()) max(5) each(output(summary())))))", + "all(group(a) max(5) each(output(count()) max(7) each(output(summary()))))", + "all(group(a) each(output(count()) each(output(summary()))))", + "all(group(customer) each(group(time.date(date)) each(output(sum(price)))))", + "all(group(customer) each(max(1) output(sum(price)) each(output(summary()))) each(group(time.date(date)) each(max(10) output(sum(price)) each(output(summary())))))", + "all(group(price) each(each(output(summary()))))", + "all(group(price/1000) each(each(output(summary()))))", + "all(group(fixedwidth(price,1000)) each(each(output(summary()))))", + "all(group(predefined(price, bucket(0,1000), bucket(1000,2000), bucket(2000,5000), bucket(5000,inf))) each(each(output(summary()))))", + "all(group(predefined(price, bucket[0,1000>, bucket[1000,2000>, bucket[2000,5000>, bucket[5000,inf>)) each(each(output(summary()))))", + "all(group(predefined(customer, bucket(-inf,\"Jones\"), bucket(\"Jones\", inf))) each(each(output(summary()))))", + "all(group(predefined(customer, bucket<-inf,\"Jones\">, bucket[\"Jones\"], bucket<\"Jones\", inf>)) each(each(output(summary()))))", + "all(group(predefined(tax, bucket[0.0,0.2>, bucket[0.2,0.5>, bucket[0.5,inf>)) each(each(output(summary()))))", + // "{ 'continuations':['BGAAABEBCA'] }all(output(count()))", + // "{ 'continuations':['BGAAABEBCA', 'BGAAABEBEBC'] }all(output(count()))", + "all(group(mod(div(date,mul(60,60)),24)) each(output(sum(price))))", + "all(group(customer) each(output(sum(mul(price,sub(1,tax))))))", + "all( group(a) each(output(count())) )", + "all( all(group(a) each(output(count()))) all(group(b) each(output(count()))) )", + "all( max(1000) all(group(a) each(output(count()))) )", + "all( group(a % 5) each(output(count())) )", + "all( group(a + b * c) each(output(count())) )", + "all( group(a % 5) order(sum(b)) each(output(count())) )", + "all( group(a + b * c) order(max(d)) each(output(count())) )", + "all( group(a) order(avg(relevance()) * count()) each(output(count())) )", + "all(group(a) order(max(attr) * count()) each(output(count())) )", + "all( group(a) each(max(1) each(output(summary()))) )", + "all( group(a) each(max(1) output(count(), sum(b)) each(output(summary()))) )", + "all(group(a) each(max(1) output(count(), sum(b), xor(md5(cat(a, b, c), 64))) each(output(summary()))))", + "all( group(a) max(5) each(max(69) output(count()) each(output(summary()))) )", + "all( group(a) max(5) each(output(count()) all(group(b) max(5) each(max(69) output(count()) each(output(summary()))))) )", + "all( group(a) max(5) each(output(count()) all(group(b) max(5) each(output(count()) all(group(c) max(5) each(max(69) output(count()) each(output(summary()))))) )))", + "all( group(a) max(5) each(output(count()) all(group(b) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(c) max(5) each(max(69) output(count()) each(output(summary()))))) )))", + "all( group(a) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(b) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(c) max(5) each(max(69) output(count()) each(output(summary()))))) )))", + "all( group(a) max(5) each(output(count()) all(max(1) each(output(summary(complexsummary)))) all(group(b) max(5) each(output(count()) all(max(1) each(output(summary(simplesummary)))) all(group(c) max(5) each(max(69) output(count()) each(output(summary(fastsummary)))))) )))", + "all( group(a) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(b) each(output(count()) all(max(1) each(output(summary()))) all(group(c) each(output(count()) all(max(1) each(output(summary())))))))) )))", + "all( group(time.year(a)) each(output(count())) )", + "all( group(time.year(a)) each(output(count()) all(group(time.monthofyear(a)) each(output(count())))) )", + "all( group(time.year(a)) each(output(count()) all(group(time.monthofyear(a)) each(output(count()) all(group(time.dayofmonth(a)) each(output(count()) all(group(time.hourofday(a)) each(output(count())))))))) )", + "all( group(predefined((now() - a) / (60 * 60 * 24), bucket(0,1), bucket(1,2), bucket(3,7), bucket(8,31))) each(output(count()) all(max(2) each(output(summary()))) all(group((now() - a) / (60 * 60 * 24)) each(output(count()) all(max(2) each(output(summary())))))) )", + "all( group(a) output(count()) )", + "all( group(strlen(name)) output(count()) )", + "all( group(a) output(count()) each(output(sum(b))) )", + "all( group(a) max(3) output(count()) each(output(sum(b))) )", + "all( group(a) max(10) output(count()) each(group(b) output(count())) )", + "all(group(1) each(output(avg(rating))))", + "all( group(predefined(rating, bucket[-inf, 0>, bucket[0, inf>)) each(output(count())) )", + "all( group(predefined(rating, bucket[-inf, 0>, bucket[0, inf>)) order(max(rating)) max(1) each( max(100) each(output(summary(name_only)))) )", + }; + + for (int i = 0; i < groupingQueries.length; i++) { + groupingQueries[i] = "select * from sources * where true | " + groupingQueries[i]; + } + String[] queries = new String[] { "select * from music", "select * from sources * where range(title, 0.0, 500.0)", // /container-search/src/test/java/com/yahoo/select/SelectTestCase.java @@ -79,7 +141,8 @@ Stream generateGoodTests() { "select * from music where title contains \"madonna\" and !(title contains \"saint\")", "select * from music where text contains phrase(\"st\", \"louis\", \"blues\")", "select * from music where persons contains sameElement(first_name contains 'Joe', last_name contains 'Smith', year_of_birth < 1940)", - // "select * from music where identities contains sameElement(key contains 'father', value.first_name contains 'Joe', value.last_name contains 'Smith', value.year_of_birth < 1940)", + "select * from music where identities contains sameElement(key contains 'father', value.first_name contains 'Joe', value.last_name contains 'Smith', value.year_of_birth < 1940)", + "select * from music where gradparentStruct.parentStruct.childField contains 'madonna'", "select * from music where fieldName contains equiv(\"A\",\"B\")", "select * from music where myUrlField contains uri(\"vespa.ai/foo\")", "select * from music where myStringAttribute contains ({prefixLength:1, maxEditDistance:2}fuzzy(\"parantesis\"))", @@ -89,32 +152,40 @@ Stream generateGoodTests() { "select * from sources * where vendor contains \"brick and mortar\" AND price < 50 AND userQuery()", "select * from music where rank(a contains \"A\", b contains \"B\", c contains \"C\")", "select * from music where rank(nearestNeighbor(field, queryVector), a contains \"A\", b contains \"B\", c contains \"C\")", - // "select * from music where integer_field in (10, 20, 30)", - // "select * from music where string_field in ('germany', 'france', 'norway')", - // "select * from music where integer_field in (@integer_values)", - // "select * from music where string_field in (@string_values)", - // "select * from music where dotProduct(description, {\"a\":1, \"b\":2})", - // "select * from music where weightedSet(description, {\"a\":1, \"b\":2})", - // "select * from music where wand(description, [[11,1], [37,2]])", - // "select * from music where ({scoreThreshold: 0.13, targetHits: 7}wand(description, {\"a\":1, \"b\":2}))", + "select * from music where integer_field in (10, 20, 30)", + "select * from music where string_field in ('germany', 'france', 'norway')", + "select * from music where integer_field in (@integer_values)", + "select * from music where string_field in (@string_values)", + "select * from music where dotProduct(description, {\"a\":1, \"b\":2})", + "select * from music where weightedSet(description, {\"a\":1, \"b\":2})", + "select * from music where wand(description, [[11,1], [37,2]])", + "select * from music where ({scoreThreshold: 0.13, targetHits: 7}wand(description, {\"a\":1, \"b\":2}))", "select * from music where weakAnd(a contains \"A\", b contains \"B\")", "select * from music where ({targetHits: 7}weakAnd(a contains \"A\", b contains \"B\"))", "select * from music where geoLocation(myfieldname, 63.5, 10.5, \"200 km\")", "select * from music where ({targetHits: 10}nearestNeighbor(doc_vector, query_vector))&input.query(query_vector)=[3,5,7]", "select * from sources * where bar contains \"a\" and nonEmpty(bar contains \"bar\" and foo contains @foo)", - // "select * from music where predicate(predicate_field,{\"gender\":\"Female\"},{\"age\":20L})", - // "select * from music where predicate(predicate_field,0,{\"age\":20L})", + "select * from music where predicate(predicate_field,{\"gender\":\"Female\"},{\"age\":20L})", + "select * from music where predicate(predicate_field,0,{\"age\":20L})", "select * from music where title contains \"madonna\" order by price asc, releasedate desc", "select * from music where title contains \"madonna\" order by {function: \"uca\", locale: \"en_US\", strength: \"IDENTICAL\"}other desc, {function: \"lowercase\"}something", "select * from music where title contains \"madonna\" limit 31 offset 29", "select * from music where title contains \"madonna\" timeout 70", "select * from music where userInput(@userinput)", "select * from music where text contains ({distance: 5}near(\"a\", \"b\")) and text contains ({distance:2}near(\"c\", \"d\"))", + "select * from music where ({bounds:\"rightOpen\"}range(year, 2000, 2018))", + "select * from music where text contains ({distance: 5}near(\"a\", \"b\"))", + "select * from music where myUrlField.hostname contains uri(\"vespa.ai\")", + "select * from music where myUrlField.hostname contains ({startAnchor: true}uri(\"vespa.ai\"))", + "select * from music where title contains ({weight:200}\"heads\")", + "select * from sources * where ({stem: false}(foo contains \"a\" and bar contains \"b\")) or foo contains {stem: false}\"c\"", + "select * from sources * where foo contains @animal and foo contains phrase(@animal, @syntaxExample, @animal)", + "select * from sources * where sddocname contains 'purchase' | all(group(customer) each(output(sum(price))))", }; + Stream queryStream = Stream.concat(Arrays.stream(queries), Arrays.stream(groupingQueries)); - return Arrays.stream(queries) - .map(query -> DynamicTest.dynamicTest(query, () -> checkQueryParses(0, query))); + return queryStream.map(query -> DynamicTest.dynamicTest(query, () -> checkQueryParses(0, query))); } private record TestWithError(int expectedErrors, String query) {} @@ -123,6 +194,7 @@ private record TestWithError(int expectedErrors, String query) {} Stream InvalidQuery() throws Exception { var queries = new TestWithError[] { new TestWithError(1, "seletc *"), + // new TestWithError(1, "select * from sources * where true | all(group(a) order(attr * count()) each(output(count())) )"), }; return Arrays.stream(queries) diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java index c085cb6f83c0..add531e3ef1e 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretReader.java @@ -37,34 +37,38 @@ public abstract class AsmSecretReader extends AsmSecretStoreBase implements TypedSecretStore { - private static final Duration CACHE_EXPIRE = Duration.ofMinutes(30); + private static final Duration DEFAULT_REFRESH_INTERVAL = Duration.ofMinutes(30); private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(5); private final LoadingCache cache; private final Runnable ztsClientCloser; + private final Duration refreshInterval; protected record VersionKey(Key key, SecretVersionId version) {} // For subclasses using dependency injection public AsmSecretReader(AsmSecretConfig config, ServiceIdentityProvider identities) { this(ztsClient(URI.create(config.ztsUri()), identities.getIdentitySslContext()), - athenzDomain(config, identities)); + athenzDomain(config, identities), + Duration.ofMinutes(config.refreshInterval())); } public AsmSecretReader(URI ztsUri, SSLContext sslContext, AthenzDomain domain) { - this(ztsClient(ztsUri, sslContext), domain); + this(ztsClient(ztsUri, sslContext), domain, DEFAULT_REFRESH_INTERVAL); } - private AsmSecretReader(ZtsClient ztsClient, AthenzDomain domain) { + private AsmSecretReader(ZtsClient ztsClient, AthenzDomain domain, Duration refreshInterval) { super(ztsClient, domain); + this.refreshInterval = refreshInterval; cache = initCache(); ztsClientCloser = ztsClient::close; } // For testing - public AsmSecretReader(Function clientAndCredentialsSupplier) { + public AsmSecretReader(Function clientAndCredentialsSupplier) { super(clientAndCredentialsSupplier); + this.refreshInterval = DEFAULT_REFRESH_INTERVAL; cache = initCache(); ztsClientCloser = () -> {}; } @@ -85,7 +89,7 @@ private static AthenzDomain athenzDomain(AsmSecretConfig config, ServiceIdentity private LoadingCache initCache() { return CacheBuilder.newBuilder() - .refreshAfterWrite(CACHE_EXPIRE) + .refreshAfterWrite(refreshInterval) // See documentation for refreshAfterWrite for why we use asyncReloading. .build(CacheLoader.asyncReloading(new CacheLoader<>() { @Override @@ -143,7 +147,7 @@ public Secret getSecret(Key key, SecretVersionId version) { var msg = version == null ? "Failed to retrieve current version of secret with key " + key : "Failed to retrieve secret with key " + key + ", version: " + version.value(); - throw new IllegalArgumentException(msg, e); + throw new IllegalArgumentException(msg + ":\n" + e.getMessage()); } } diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretStoreBase.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretStoreBase.java index 9a31e143bbf0..b4f50e9d3af7 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretStoreBase.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmSecretStoreBase.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package ai.vespa.secret.aws; +import ai.vespa.secret.model.ExternalId; import ai.vespa.secret.model.VaultName; import com.yahoo.component.AbstractComponent; import com.yahoo.vespa.athenz.api.AthenzDomain; @@ -13,6 +14,7 @@ import software.amazon.awssdk.services.secretsmanager.SecretsManagerClient; import java.util.HashSet; +import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -27,38 +29,43 @@ public abstract class AsmSecretStoreBase extends AbstractComponent implements Au public static final String AWSCURRENT = "AWSCURRENT"; - private final Function clientAndCredentialsSupplier; + private final Function clientAndCredentialsSupplier; - private final ConcurrentMap clientMap = new ConcurrentHashMap<>(); + private final ConcurrentMap clientMap = new ConcurrentHashMap<>(); public AsmSecretStoreBase(ZtsClient ztsClient, AthenzDomain athenzDomain) { - this(awsRole -> SecretsManagerClient.builder().region(Region.US_EAST_1) - .credentialsProvider(getAwsSessionCredsProvider(awsRole, ztsClient, athenzDomain)) + this(assumedRoleInfo -> SecretsManagerClient.builder().region(Region.US_EAST_1) + .credentialsProvider(getAwsSessionCredsProvider(assumedRoleInfo, ztsClient, athenzDomain)) .build() ); } // For testing - protected AsmSecretStoreBase(Function clientAndCredentialsSupplier) { + protected AsmSecretStoreBase(Function clientAndCredentialsSupplier) { this.clientAndCredentialsSupplier = clientAndCredentialsSupplier; } /** Returns the AWS role associated with the given vault. */ protected abstract AwsRolePath awsRole(VaultName vault); + protected ExternalId externalId(VaultName vault) { + return null; + } protected SecretsManagerClient getClient(VaultName vault) { var awsRole = awsRole(vault); - clientMap.putIfAbsent(awsRole, clientAndCredentialsSupplier.apply(awsRole)); - return clientMap.get(awsRole); + var externalId = externalId(vault); + var assumedRoleInfo = new AssumedRoleInfo(awsRole, Optional.ofNullable(externalId)); + clientMap.putIfAbsent(assumedRoleInfo, clientAndCredentialsSupplier.apply(assumedRoleInfo)); + return clientMap.get(assumedRoleInfo); } - private static AwsCredentialsProvider getAwsSessionCredsProvider(AwsRolePath role, + private static AwsCredentialsProvider getAwsSessionCredsProvider(AssumedRoleInfo roleInfo, ZtsClient ztsClient, AthenzDomain athenzDomain) { - AwsCredentials credentials = new AwsCredentials(ztsClient, athenzDomain, role.athenzAwsRole()); + AwsCredentials credentials = new AwsCredentials(ztsClient, athenzDomain, roleInfo.role().athenzAwsRole(), roleInfo.externalId().map(ExternalId::value).orElse(null)); return () -> { AwsTemporaryCredentials temporary = credentials.get(); return AwsSessionCredentials.create(temporary.accessKeyId(), @@ -86,7 +93,6 @@ public void deconstruct() { // Only for testing public Set clientRoleNames() { - return new HashSet<>(clientMap.keySet()); + return new HashSet<>(clientMap.keySet().stream().map(AssumedRoleInfo::role).toList()); } - } diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java index ac6d8caa1878..c736b50bd743 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AsmTenantSecretReader.java @@ -2,13 +2,19 @@ package ai.vespa.secret.aws; import ai.vespa.secret.config.aws.AsmSecretConfig; +import ai.vespa.secret.config.aws.AsmTenantSecretConfig; +import ai.vespa.secret.model.ExternalId; import ai.vespa.secret.model.Key; +import ai.vespa.secret.model.VaultId; import ai.vespa.secret.model.VaultName; import com.yahoo.component.annotation.Inject; import com.yahoo.vespa.athenz.identity.ServiceIdentityProvider; import software.amazon.awssdk.services.secretsmanager.SecretsManagerClient; +import java.util.Map; +import java.util.Optional; import java.util.function.Function; +import java.util.stream.Collectors; /** * Secret reader for tenant nodes. @@ -19,25 +25,44 @@ public final class AsmTenantSecretReader extends AsmSecretReader { private final String system; private final String tenant; + private final Map vaults; @Inject - public AsmTenantSecretReader(AsmSecretConfig config, ServiceIdentityProvider identities) { - super(config, identities); - this.system = config.system(); - this.tenant = config.tenant(); + public AsmTenantSecretReader(AsmSecretConfig secretConfig, + AsmTenantSecretConfig tenantConfig, + ServiceIdentityProvider identities) { + super(secretConfig, identities); + this.system = tenantConfig.system(); + this.tenant = tenantConfig.tenant(); + this.vaults = createVaultIdMap(tenantConfig); } // For testing - AsmTenantSecretReader(Function clientAndCredentialsSupplier, - String system, String tenant) { + AsmTenantSecretReader(Function clientAndCredentialsSupplier, + String system, String tenant, Map vaults) { super(clientAndCredentialsSupplier); this.system = system; this.tenant = tenant; + this.vaults = vaults; + } + + static Map createVaultIdMap(AsmTenantSecretConfig config) { + // Note: we can rightfully assume that the vaults are unique by name for a tenant. + return config.vaults().stream() + .map(vault -> Map.entry(VaultName.of(vault.name()), new Vault(VaultId.of(vault.id()), VaultName.of(vault.name()), ExternalId.of(vault.externalId())))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } @Override protected AwsRolePath awsRole(VaultName vault) { - return AthenzUtil.awsReaderRole(system, tenant, vault); + if ( ! vaults.containsKey(vault)) { + throw new IllegalArgumentException("No vault id found for " + vault); + } + return AthenzUtil.awsReaderRole(system, tenant, vaults.get(vault).vaultId()); + } + + protected ExternalId externalId(VaultName vaultName) { + return Optional.ofNullable(vaults.get(vaultName)).map(Vault::externalId).orElse(null); } @Override @@ -56,4 +81,5 @@ public static String getAwsSecretId(String system, String tenant, Key key) { key.vaultName().value(), key.secretName().value()); } + record Vault(VaultId vaultId, VaultName vaultName, ExternalId externalId) {} } diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AssumedRoleInfo.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AssumedRoleInfo.java new file mode 100644 index 000000000000..bc6f14b15b90 --- /dev/null +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AssumedRoleInfo.java @@ -0,0 +1,30 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package ai.vespa.secret.aws; + +import ai.vespa.secret.model.ExternalId; + +import java.util.Objects; +import java.util.Optional; + +/** + * Information used to assume an AWS role. + * @param role The role and path to assume + * @param externalId The external ID to use when assuming the role, Optional.empty() if not required + * @author mortent + */ +public record AssumedRoleInfo(AwsRolePath role, Optional externalId) { + + public AssumedRoleInfo { + Objects.requireNonNull(role, "role cannot be null"); + Objects.requireNonNull(externalId, "externalId cannot be null"); + } + + public static AssumedRoleInfo of(AwsRolePath role) { + return new AssumedRoleInfo(role, Optional.empty()); + } + + public static AssumedRoleInfo of(AwsRolePath role, ExternalId externalId) { + return new AssumedRoleInfo(role, Optional.ofNullable(externalId)); + } +} diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AthenzUtil.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AthenzUtil.java index 21005ed21fed..b0c2372172fa 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AthenzUtil.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/AthenzUtil.java @@ -2,6 +2,7 @@ package ai.vespa.secret.aws; import ai.vespa.secret.model.Role; +import ai.vespa.secret.model.VaultId; import ai.vespa.secret.model.VaultName; import com.yahoo.vespa.athenz.api.AwsRole; @@ -32,6 +33,11 @@ public static String resourceEntityName(String system, String tenant, VaultName .toLowerCase(); } + /* .reader */ + public static String athenzReaderRoleName(VaultName vault) { + return "%s.%s".formatted(vault.value(), Role.READER.value()); + } + /* Path: /tenant-secret/// */ public static AwsPath awsPath(String systemName, String tenantName) { return AwsPath.of(PREFIX, systemName, tenantName); @@ -43,13 +49,13 @@ public static AwsPath awsPath(String systemName, String tenantName) { * We use vaultId instead of vaultName because vaultName is not unique across tenants, * and role names must be unique across paths within an account. */ - public static AwsRolePath awsReaderRole(String systemName, String tenantName, VaultName vault) { - return new AwsRolePath(awsPath(systemName, tenantName), new AwsRole(athenzReaderRoleName(vault))); + public static AwsRolePath awsReaderRole(String systemName, String tenantName, VaultId vaultId) { + return new AwsRolePath(awsPath(systemName, tenantName), new AwsRole(awsReaderRoleName(vaultId))); } /* .reader */ - private static String athenzReaderRoleName(VaultName vault) { - return "%s.%s".formatted(vault.value(), Role.READER.value()); + private static String awsReaderRoleName(VaultId vaultId) { + return "%s.%s".formatted(vaultId.value(), Role.READER.value()); } } diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretImpl.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretImpl.java new file mode 100644 index 000000000000..d80a2be1f2e0 --- /dev/null +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretImpl.java @@ -0,0 +1,31 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package ai.vespa.secret.aws; + +import ai.vespa.secret.Secret; +import ai.vespa.secret.internal.TypedSecretStore; +import ai.vespa.secret.model.Key; +import ai.vespa.secret.model.SecretName; +import ai.vespa.secret.model.VaultName; + +/** + * @author mortent + */ +public class SecretImpl implements Secret { + + private final VaultName vaultName; + private final SecretName secretName; + private final TypedSecretStore secrets; + + public SecretImpl(VaultName vaultName, SecretName secretName, TypedSecretStore secrets) { + this.vaultName = vaultName; + this.secretName = secretName; + this.secrets = secrets; + } + + @Override + public String current() { + var secret = secrets.getSecret(new Key(vaultName, secretName)); + return secret.secretAsString(); + } +} diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretsImpl.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretsImpl.java index e54b5338125d..8900f2e42795 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretsImpl.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/SecretsImpl.java @@ -5,7 +5,6 @@ import ai.vespa.secret.Secrets; import ai.vespa.secret.config.SecretsConfig; import ai.vespa.secret.internal.TypedSecretStore; -import ai.vespa.secret.model.Key; import ai.vespa.secret.model.SecretName; import ai.vespa.secret.model.VaultName; @@ -43,12 +42,6 @@ public Secret get(String key) { VaultName vaultName = VaultName.of(secretConfig.vault()); SecretName secretName = SecretName.of(secretConfig.name()); - var secret = secretStore.getSecret(new Key(vaultName, secretName)); - if (secret == null) { - throw new IllegalArgumentException("Secret with key '" + key + "' not found in secret store"); - } - - return secret::secretAsString; - } - + return new SecretImpl(vaultName, secretName, secretStore); + } } diff --git a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/testutil/AsmSecretReaderTester.java b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/testutil/AsmSecretReaderTester.java index 94b1e7ce7262..18da4ae4fbba 100644 --- a/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/testutil/AsmSecretReaderTester.java +++ b/jdisc-cloud-aws/src/main/java/ai/vespa/secret/aws/testutil/AsmSecretReaderTester.java @@ -5,6 +5,7 @@ package ai.vespa.secret.aws.testutil; +import ai.vespa.secret.aws.AssumedRoleInfo; import ai.vespa.secret.aws.AwsRolePath; import ai.vespa.secret.model.Key; import ai.vespa.secret.model.SecretVersionState; @@ -30,8 +31,8 @@ public void put(Key key, SecretVersion... versions) { secrets.put(awsSecretIdMapper.apply(key), List.of(versions)); } - public MockSecretsReader newClient(AwsRolePath awsRole) { - return new MockSecretsReader(awsRole); + public MockSecretsReader newClient(AssumedRoleInfo assumedRoleInfo) { + return new MockSecretsReader(assumedRoleInfo.role()); } diff --git a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java index ecea026a03fd..b0a449b36707 100644 --- a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java +++ b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/AsmTenantSecretReaderTest.java @@ -2,15 +2,19 @@ import ai.vespa.secret.aws.testutil.AsmSecretReaderTester; import ai.vespa.secret.aws.testutil.AsmSecretTesterBase.SecretVersion; +import ai.vespa.secret.config.aws.AsmTenantSecretConfig; +import ai.vespa.secret.model.ExternalId; import ai.vespa.secret.model.Key; import ai.vespa.secret.model.Secret; import ai.vespa.secret.model.SecretName; import ai.vespa.secret.model.SecretVersionId; import ai.vespa.secret.model.SecretVersionState; +import ai.vespa.secret.model.VaultId; import ai.vespa.secret.model.VaultName; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import java.util.Map; import java.util.Set; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -40,15 +44,17 @@ void reset() { } AsmTenantSecretReader secretReader() { - return new AsmTenantSecretReader(tester::newClient, system, tenant); + return new AsmTenantSecretReader(tester::newClient, system, tenant, + Map.of(VaultName.of("vault1"), new AsmTenantSecretReader.Vault(VaultId.of("vaultId1"), VaultName.of("vault1"), ExternalId.of("ext1")), + VaultName.of("vault2"), new AsmTenantSecretReader.Vault(VaultId.of("vaultId2"), VaultName.of("vault2"), ExternalId.of("ext2")))); } @Test void it_creates_one_credentials_and_client_per_vault_and_closes_them() { var vault1 = VaultName.of("vault1"); - var awsRole1 = AwsRolePath.fromStrings("/tenant-secret/publiccd/tenant1/", "vault1.reader"); + var awsRole1 = AwsRolePath.fromStrings("/tenant-secret/publiccd/tenant1/", "vaultId1.reader"); var vault2 = VaultName.of("vault2"); - var awsRole2 = AwsRolePath.fromStrings("/tenant-secret/publiccd/tenant1/", "vault2.reader"); + var awsRole2 = AwsRolePath.fromStrings("/tenant-secret/publiccd/tenant1/", "vaultId2.reader"); var secret1 = new SecretVersion("1", SecretVersionState.CURRENT, "secret1"); var secret2 = new SecretVersion("2", SecretVersionState.CURRENT, "secret2"); @@ -108,10 +114,10 @@ void it_throws_exception_if_secret_not_found() { var key = new Key(vault, SecretName.of("secret1")); try (var reader = secretReader()) { var e = assertThrows(IllegalArgumentException.class, () -> reader.getSecret(key)); - assertEquals("Failed to retrieve current version of secret with key vault1/secret1", e.getMessage()); + assertTrue(e.getMessage().startsWith("Failed to retrieve current version of secret with key vault1/secret1")); e = assertThrows(IllegalArgumentException.class, () -> reader.getSecret(key, SecretVersionId.of("1"))); - assertEquals("Failed to retrieve secret with key vault1/secret1, version: 1", e.getMessage()); + assertTrue(e.getMessage().startsWith("Failed to retrieve secret with key vault1/secret1, version: 1")); } } @@ -125,7 +131,7 @@ void it_throws_exception_if_version_not_found() { try (var store = secretReader()) { var e = assertThrows(IllegalArgumentException.class, () -> store.getSecret(key, SecretVersionId.of("2"))); - assertEquals("Failed to retrieve secret with key vault1/secret1, version: 2", e.getMessage()); + assertTrue(e.getMessage().startsWith("Failed to retrieve secret with key vault1/secret1, version: 2")); } } @@ -159,6 +165,25 @@ void it_returns_empty_list_of_versions_for_unknown_secret() { } } + @Test + void it_creates_map_from_vaultName_to_vaultId_from_config() { + var config = new AsmTenantSecretConfig.Builder() + .system(system) + .tenant(tenant) + .vaults(builder -> builder.name("vault1").id("id1").externalId("ext1")) + .vaults(builder -> builder.name("vault2").id("id2").externalId("ext2")); + + Map idMap = AsmTenantSecretReader.createVaultIdMap(config.build()); + assertEquals(2, idMap.size()); + var vault1 = idMap.get(VaultName.of("vault1")); + assertEquals(VaultId.of("id1"), vault1.vaultId()); + assertEquals(ExternalId.of("ext1"), vault1.externalId()); + + var vault2 = idMap.get(VaultName.of("vault2")); + assertEquals(VaultId.of("id2"), vault2.vaultId()); + assertEquals(ExternalId.of("ext2"), vault2.externalId()); + } + private void assertSame(SecretVersion version, Secret secret) { assertEquals(version.value(), secret.secretAsString()); assertEquals(version.version(), secret.version().value()); diff --git a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/SecretsImplTest.java b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/SecretsImplTest.java index 225d4d86fcd7..54526b9c53af 100644 --- a/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/SecretsImplTest.java +++ b/jdisc-cloud-aws/src/test/java/ai/vespa/secret/aws/SecretsImplTest.java @@ -47,16 +47,6 @@ public void testThrowOnUnknownSecrets() { } } - @Test - public void testSecretInConfigButNotInVault() { - try { - secrets.get("mySecret"); - fail(); - } catch (IllegalArgumentException e) { - assertEquals("Secret with key 'mySecret' not found in secret store", e.getMessage()); - } - } - private static SecretsImpl createSecrets() { var config = createSecretsConfig(); var secretStore = createSecretStore(); diff --git a/metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java index f15b7412b24d..577a7a48673c 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java @@ -26,8 +26,8 @@ public enum ClusterControllerMetrics implements VespaMetrics { RESOURCE_USAGE_NODES_ABOVE_LIMIT("cluster-controller.resource_usage.nodes_above_limit", Unit.NODE, "The number of content nodes above resource limit, blocking feed"), RESOURCE_USAGE_MAX_MEMORY_UTILIZATION("cluster-controller.resource_usage.max_memory_utilization", Unit.FRACTION, "Current memory utilisation, for content node with highest value"), RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, for content node with highest value"), - RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"), - RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"), + RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"), + RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"), REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress"); diff --git a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java index 3a00a9fcfc52..0b64c78fe0ef 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ControllerMetrics.java @@ -53,6 +53,8 @@ public enum ControllerMetrics implements VespaMetrics { HUBSPOT_EXCEPTIONS("hubspot.exceptions", Unit.FAILURE, "Controller: Hubspot exceptions"), HUBSPOT_LAST_SUCCESS("hubspot.last_success", Unit.SECONDS_SINCE_EPOCH, "Controller: Last successful Hubspot synchronization"), TENANT_CREATED_LAST_SUCCESS("tenant.created.last_success", Unit.SECONDS_SINCE_EPOCH, "Controller: Last successful tenant creation"), + ATLASSIAN_EXCEPTIONS("atlassian.exceptions", Unit.FAILURE, "Controller: Atlassian exceptions"), + ATLASSIAN_LAST_SUCCESS("atlassian.last_success", Unit.SECONDS_SINCE_EPOCH, "Controller: Last successful Atlassian synchronization"), // Metrics per API, metrics names generated in ControllerMaintainer/MetricsReporter OPERATION_APPLICATION("operation.application", Unit.REQUEST, "Controller: Requests for /application API"), diff --git a/metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java b/metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java index ba5b65643e06..da488a4f5ec1 100644 --- a/metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java @@ -214,17 +214,32 @@ public enum SearchNodeMetrics implements VespaMetrics { CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.ready.attribute.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"), CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.ready.attribute.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"), CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.ready.attribute.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"), + CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_DISK_USAGE("content.proton.documentdb.ready.attribute.disk_usage", Unit.BYTE, "Disk space usage (in bytes) of the flushed snapshot of this attribute for this document type"), CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES("content.proton.documentdb.notready.attribute.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes"), CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.notready.attribute.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"), CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.notready.attribute.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"), CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.notready.attribute.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"), // index - CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ALLOCATED_BYTES("content.proton.documentdb.index.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes"), - CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.index.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"), - CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.index.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"), - CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.index.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"), - CONTENT_PROTON_DOCUMENTDB_INDEX_DISK_USAGE("content.proton.documentdb.index.disk_usage", Unit.BYTE, "Disk space usage in bytes"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_ELEMENTS("content.proton.index.cache.postinglist.elements", Unit.ITEM, "Number of elements in the cache. Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_MEMORY_USAGE("content.proton.index.cache.postinglist.memory_usage", Unit.BYTE, "Memory usage of the cache (in bytes). Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_HIT_RATE("content.proton.index.cache.postinglist.hit_rate", Unit.FRACTION, "Rate of hits in the cache compared to number of lookups. Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_LOOKUPS("content.proton.index.cache.postinglist.lookups", Unit.OPERATION, "Number of lookups in the cache (hits + misses). Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_INVALIDATIONS("content.proton.index.cache.postinglist.invalidations", Unit.OPERATION, "Number of invalidations (erased elements) in the cache. Contains disk index posting list files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_ELEMENTS("content.proton.index.cache.bitvector.elements", Unit.ITEM, "Number of elements in the cache. Contains disk index bitvector files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_MEMORY_USAGE("content.proton.index.cache.bitvector.memory_usage", Unit.BYTE, "Memory usage of the cache (in bytes). Contains disk index bitvector files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_HIT_RATE("content.proton.index.cache.bitvector.hit_rate", Unit.FRACTION, "Rate of hits in the cache compared to number of lookups. Contains disk index bitvector files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_LOOKUPS("content.proton.index.cache.bitvector.lookups", Unit.OPERATION, "Number of lookups in the cache (hits + misses). Contains disk index bitvector files across all document types"), + CONTENT_PROTON_INDEX_CACHE_BITVECTOR_INVALIDATIONS("content.proton.index.cache.bitvector.invalidations", Unit.OPERATION, "Number of invalidations (erased elements) in the cache. Contains disk index bitvector files across all document types"), + CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ALLOCATED_BYTES("content.proton.documentdb.index.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes for the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.index.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes) for the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.index.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes) for the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.index.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold for the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_DISK_USAGE("content.proton.documentdb.index.disk_usage", Unit.BYTE, "Disk space usage (in bytes) of all disk indexes for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES("content.proton.documentdb.index.io.search.read_bytes", Unit.BYTE, "Bytes read from disk index posting list and bitvector files as part of search for this document type"), + CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES("content.proton.documentdb.index.io.search.cached_read_bytes", Unit.BYTE, "Bytes read from cached disk index posting list and bitvector files as part of search for this document type"), + CONTENT_PROTON_DOCUMENTDB_READY_INDEX_MEMORY_USAGE_ALLOCATED_BYTES("content.proton.documentdb.ready.index.memory_usage.allocated_bytes", Unit.BYTE, "The number of allocated bytes for this index field in the memory index for this document type"), + CONTENT_PROTON_DOCUMENTDB_READY_INDEX_DISK_USAGE("content.proton.documentdb.ready.index.disk_usage", Unit.BYTE, "Disk space usage (in bytes) of this index field in all disk indexes for this document type"), // matching CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERIES("content.proton.documentdb.matching.queries", Unit.QUERY, "Number of queries executed"), diff --git a/metrics/src/main/java/ai/vespa/metrics/docs/MetricSetDocumentation.java b/metrics/src/main/java/ai/vespa/metrics/docs/MetricSetDocumentation.java index a15f29160917..6fcca1cb9851 100644 --- a/metrics/src/main/java/ai/vespa/metrics/docs/MetricSetDocumentation.java +++ b/metrics/src/main/java/ai/vespa/metrics/docs/MetricSetDocumentation.java @@ -63,7 +63,7 @@ protected static void writeMetricSetDocumentation(String path, String name, Metr

    %s Metrics

    - + %s @@ -92,9 +92,9 @@ private static String htmlRows(Map> metrics) """, entry.getKey().baseName().replaceAll("\\.", "_"), entry.getKey().baseName(), - entry.getKey().description(), entry.getKey().unit().toString().toLowerCase(), - String.join(", ", entry.getValue())) + String.join(", ", entry.getValue()), + entry.getKey().description()) ).collect(Collectors.joining()); } diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java index 29c1ea923a5b..22f6d489f9b9 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java @@ -210,6 +210,9 @@ private static Set getControllerMetrics() { addMetric(metrics, ControllerMetrics.TENANT_CREATED_LAST_SUCCESS.last()); + addMetric(metrics, ControllerMetrics.ATLASSIAN_EXCEPTIONS.count()); + addMetric(metrics, ControllerMetrics.ATLASSIAN_LAST_SUCCESS.last()); + return metrics; } diff --git a/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java index 0d5827369fd6..6f53da81d3e1 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/Vespa9VespaMetricSet.java @@ -439,10 +439,24 @@ private static Set getSearchNodeMetrics() { // attribute addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_DISK_USAGE.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES.average()); // index addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ALLOCATED_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES, EnumSet.of(sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES, EnumSet.of(sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_INDEX_DISK_USAGE.average()); + + // index caches + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_INVALIDATIONS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_INVALIDATIONS.rate()); // matching addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERIES.rate()); diff --git a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java index 0e6c537f56dd..c56e54017ca7 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java @@ -503,6 +503,7 @@ private static Set getSearchNodeMetrics() { addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_USED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_DEAD_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_MEMORY_USAGE_ONHOLD_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_ATTRIBUTE_DISK_USAGE.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_ALLOCATED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_USED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_NOTREADY_ATTRIBUTE_MEMORY_USAGE_DEAD_BYTES.average()); @@ -513,6 +514,19 @@ private static Set getSearchNodeMetrics() { addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_USED_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_DEAD_BYTES.average()); addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_MEMORY_USAGE_ONHOLD_BYTES.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_READ_BYTES, EnumSet.of(sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_INDEX_IO_SEARCH_CACHED_READ_BYTES, EnumSet.of(sum, count)); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_READY_INDEX_DISK_USAGE.average()); + + // index caches + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_POSTINGLIST_INVALIDATIONS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_MEMORY_USAGE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_HIT_RATE.average()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_LOOKUPS.rate()); + addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_INDEX_CACHE_BITVECTOR_INVALIDATIONS.rate()); // matching addMetric(metrics, SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERIES.rate()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshot.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshot.java index 0e1720e6bf33..0decd1130484 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshot.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshot.java @@ -21,7 +21,7 @@ * @author mpolden */ public record Snapshot(SnapshotId id, HostName hostname, State state, History history, ClusterId cluster, - int clusterIndex, CloudAccount cloudAccount, Optional key) { + int clusterIndex, CloudAccount cloudAccount, SnapshotKey key) { public Snapshot { Objects.requireNonNull(id); @@ -126,7 +126,7 @@ public static SnapshotId generateId() { public static Snapshot create(SnapshotId id, HostName hostname, CloudAccount cloudAccount, Instant at, ClusterId cluster, int clusterIndex, SnapshotKey encryptionKey) { return new Snapshot(id, hostname, State.creating, History.of(State.creating, at), cluster, clusterIndex, - cloudAccount, Optional.of(encryptionKey)); + cloudAccount, encryptionKey); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java index 9ecc386bb98d..7d6266375c64 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/backup/Snapshots.java @@ -10,6 +10,7 @@ import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.SnapshotId; +import com.yahoo.security.KeyAlgorithm; import com.yahoo.security.KeyId; import com.yahoo.security.KeyUtils; import com.yahoo.security.SealedSharedKey; @@ -28,9 +29,8 @@ import com.yahoo.vespa.hosted.provision.provisioning.SnapshotStore; import java.security.KeyPair; +import java.security.PrivateKey; import java.security.PublicKey; -import java.security.interfaces.XECPrivateKey; -import java.security.interfaces.XECPublicKey; import java.time.Instant; import java.util.ArrayList; import java.util.List; @@ -182,11 +182,8 @@ private SecretSharedKey generateEncryptionKey(KeyPair keyPair, SnapshotId id) { /** Reseal the encryption key for snapshot using given public key */ private SealedSharedKey resealKeyOf(Snapshot snapshot, PublicKey receiverPublicKey) { - if (snapshot.key().isEmpty()) { - throw new IllegalArgumentException("Snapshot " + snapshot.id() + " has no encryption key"); - } - VersionedKeyPair sealingKeyPair = sealingKeyPair(snapshot.key().get().sealingKeyVersion()); - SecretSharedKey unsealedKey = SharedKeyGenerator.fromSealedKey(snapshot.key().get().sharedKey(), + VersionedKeyPair sealingKeyPair = sealingKeyPair(snapshot.key().sealingKeyVersion()); + SecretSharedKey unsealedKey = SharedKeyGenerator.fromSealedKey(snapshot.key().sharedKey(), sealingKeyPair.keyPair().getPrivate()); return SharedKeyGenerator.reseal(unsealedKey, receiverPublicKey, KeyId.ofString(snapshot.id().toString())) .sealedSharedKey(); @@ -199,8 +196,12 @@ private VersionedKeyPair sealingKeyPair(SecretVersionId version) { } Key key = Key.fromString(sealingPrivateKeySecretName.get()); Secret sealingPrivateKey = version == null ? secretStore.getSecret(key) : secretStore.getSecret(key, version); - XECPrivateKey privateKey = KeyUtils.fromBase64EncodedX25519PrivateKey(sealingPrivateKey.secretValue().value()); - XECPublicKey publicKey = KeyUtils.extractX25519PublicKey(privateKey); + PrivateKey privateKey = KeyUtils.fromPemEncodedPrivateKey(sealingPrivateKey.secretValue().value()); + PublicKey publicKey = KeyUtils.extractPublicKey(privateKey); + if (KeyAlgorithm.from(privateKey.getAlgorithm()) != KeyAlgorithm.XDH) { + throw new IllegalArgumentException("Expected sealing key to use algorithm " + KeyAlgorithm.XDH + + ", but got " + privateKey.getAlgorithm()); + } return new VersionedKeyPair(new KeyPair(publicKey, privateKey), sealingPrivateKey.version()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SnapshotExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SnapshotExpirer.java index fb6df1abd6af..fcb657d94a88 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SnapshotExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SnapshotExpirer.java @@ -24,7 +24,6 @@ public class SnapshotExpirer extends NodeRepositoryMaintainer { private static final Logger LOG = Logger.getLogger(SnapshotExpirer.class.getName()); - private static final Duration MIN_IDLE_PERIOD = Duration.ofDays(1); public SnapshotExpirer(NodeRepository nodeRepository, Duration interval, Metric metric) { super(nodeRepository, interval, metric); @@ -69,10 +68,15 @@ private boolean shouldRemoveAny(List snapshots, NodeList nodes, Instan /** Returns whether given snapshot should be removed */ private boolean shouldRemove(Snapshot snapshot, NodeList nodes, Instant now) { Duration idle = snapshot.idle(now); - if (idle.compareTo(MIN_IDLE_PERIOD) < 0) return false; // No: Snapshot not idle long enough + if (idle.compareTo(expiry()) < 0) return false; // No: Snapshot not idle long enough // TODO(mpolden): Replace this with a proper policy when implementing application-level backups if (nodes.node(snapshot.hostname().value()).isEmpty()) return true; // Yes: Snapshot belongs to non-existent node return snapshot.state() == Snapshot.State.restored; // Yes: Snapshot has been restored } + /** How long we should wait before a snapshot can be cleaned up */ + private Duration expiry() { + return nodeRepository().zone().system().isCd() ? Duration.ofHours(6) : Duration.ofDays(1); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Dns.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Dns.java index 2e1ebdb05ac6..86185ba36701 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Dns.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Dns.java @@ -22,9 +22,14 @@ public class Dns { private Dns() {} + // TODO: Remove REVERSE after we have stopped adding those public enum RecordType { FORWARD, PUBLIC_FORWARD, REVERSE } /** Returns the set of DNS record types for a host and its children and the given version (ipv6), host type, etc. */ + public static Set recordTypesFor(IP.Version ipVersion, NodeType hostType, CloudName cloudName, boolean enclave) { + return recordTypesFor(ipVersion, hostType, cloudName, enclave, false); + } + public static Set recordTypesFor(IP.Version ipVersion, NodeType hostType, CloudName cloudName, boolean enclave, boolean allowReverse) { if (cloudName == CloudName.AWS || cloudName == CloudName.GCP) { if (enclave) { @@ -60,13 +65,12 @@ public static Set recordTypesFor(IP.Version ipVersion, NodeType host public static void verify(String hostname, String ipAddress, NodeType nodeType, NameResolver resolver, CloudAccount cloudAccount, Zone zone) { IP.Version version = IP.Version.fromIpAddress(ipAddress); - boolean allowReverse = !hostname.endsWith(".vespa-cloud.net"); - Set recordTypes = recordTypesFor(version, nodeType, zone.cloud().name(), cloudAccount.isEnclave(zone), allowReverse); + Set recordTypes = recordTypesFor(version, nodeType, zone.cloud().name(), cloudAccount.isEnclave(zone)); if (recordTypes.contains(RecordType.FORWARD)) { NameResolver.RecordType recordType = version.is6() ? NameResolver.RecordType.AAAA : NameResolver.RecordType.A; Set addresses = resolver.resolve(hostname, recordType); - if (!addresses.equals(java.util.Set.of(ipAddress))) + if (!addresses.equals(Set.of(ipAddress))) throw new IllegalArgumentException("Expected " + hostname + " to resolve to " + ipAddress + ", but got " + addresses); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java index 56e39374d255..940301244aeb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializer.java @@ -20,7 +20,6 @@ import java.time.Instant; import java.util.List; -import java.util.Optional; /** * @author mpolden @@ -50,16 +49,11 @@ public static Snapshot fromInspector(Inspector object, CloudAccount systemAccoun Instant at = Instant.ofEpochMilli(inspector.field(AT_FIELD).asLong()); history.put(type, new Snapshot.History.Event(type, at)); }); - // TODO(mpolden): Require field after 2024-12-01 - CloudAccount cloudAccount = SlimeUtils.optionalString(object.field(CLOUD_ACCOUNT_FIELD)) - .map(CloudAccount::from) - .orElse(systemAccount); - Optional encryptionKey = Optional.empty(); - if (object.field(SEALED_SHARED_KEY_FIELD).valid()) { - SealedSharedKey sharedKey = SealedSharedKey.fromTokenString(object.field(SEALED_SHARED_KEY_FIELD).asString()); - SecretVersionId sealingKeyVersion = SecretVersionId.of(object.field(SEALING_KEY_VERSION).asString()); - encryptionKey = Optional.of(new SnapshotKey(sharedKey, sealingKeyVersion)); - } + CloudAccount cloudAccount = CloudAccount.from(object.field(CLOUD_ACCOUNT_FIELD).asString()); + SealedSharedKey sharedKey = SealedSharedKey.fromTokenString(object.field(SEALED_SHARED_KEY_FIELD).asString()); + SecretVersionId sealingKeyVersion = SecretVersionId.of(object.field(SEALING_KEY_VERSION).asString()); + SnapshotKey encryptionKey = new SnapshotKey(sharedKey, sealingKeyVersion); + return new Snapshot(SnapshotId.of(object.field(ID_FIELD).asString()), HostName.of(object.field(HOSTNAME_FIELD).asString()), stateFromSlime(object.field(STATE_FIELD).asString()), @@ -113,10 +107,8 @@ public static void toSlime(Snapshot snapshot, Cursor object) { eventObject.setLong(AT_FIELD, event.at().toEpochMilli()); }); object.setString(CLOUD_ACCOUNT_FIELD, snapshot.cloudAccount().value()); - snapshot.key().ifPresent(k -> { - object.setString(SEALED_SHARED_KEY_FIELD, k.sharedKey().toTokenString()); - object.setString(SEALING_KEY_VERSION, k.sealingKeyVersion().value()); - }); + object.setString(SEALED_SHARED_KEY_FIELD, snapshot.key().sharedKey().toTokenString()); + object.setString(SEALING_KEY_VERSION, snapshot.key().sealingKeyVersion().value()); } public static String asString(Snapshot.State state) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index c1cf70b3da63..1f7f207a4d61 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.provisioning; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ClusterMembership; @@ -17,6 +18,7 @@ import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; +import com.yahoo.yolean.Exceptions; import java.time.Instant; import java.util.ArrayList; @@ -26,6 +28,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.logging.Logger; import java.util.stream.Collectors; /** @@ -35,6 +38,8 @@ */ class Activator { + private static final Logger log = Logger.getLogger(Activator.class.getName()); + private final NodeRepository nodeRepository; private final Optional loadBalancerProvisioner; @@ -44,9 +49,9 @@ public Activator(NodeRepository nodeRepository, Optional hosts, long generation, ApplicationTransaction transaction) { - NodeList newActive = activateNodes(hosts, generation, transaction); - activateLoadBalancers(hosts, newActive, transaction); + public void activate(Collection hosts, ActivationContext context, ApplicationTransaction transaction) { + NodeList newActive = activateNodes(hosts, context.generation(), transaction); + activateLoadBalancers(hosts, newActive, transaction, context.isBootstrap()); } /** @@ -149,8 +154,14 @@ private void unreserveParentsOf(NodeList nodes) { } /** Activate load balancers */ - private void activateLoadBalancers(Collection hosts, NodeList newActive, ApplicationTransaction transaction) { - loadBalancerProvisioner.ifPresent(provisioner -> provisioner.activate(allClustersOf(hosts), newActive, transaction)); + private void activateLoadBalancers(Collection hosts, NodeList newActive, ApplicationTransaction transaction, boolean isBootstrap) { + try { + loadBalancerProvisioner.ifPresent(provisioner -> provisioner.activate(allClustersOf(hosts), newActive, transaction)); + } catch (RuntimeException e) { + if (isBootstrap) + log.warning("Failed to activate load balancers for " + transaction.application() + ": " + Exceptions.toMessageString(e) + " (Ignoring because bootstrap deployment)"); + throw e; + } } private static Set allClustersOf(Collection hosts) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java index 39c14be4d2b5..7099cbac4bc9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java @@ -107,7 +107,7 @@ public long activate() { removeApplication(application.getApplicationId()); } else { NestedTransaction nestedTransaction = new NestedTransaction(); - provisioner.activate(hostSpecs, new ActivationContext(0), new ApplicationTransaction(lock, nestedTransaction)); + provisioner.activate(hostSpecs, new ActivationContext(0, !application.getCapacity().canFail()), new ApplicationTransaction(lock, nestedTransaction)); nestedTransaction.commit(); duperModel.infraApplicationActivated( diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 8c056ad1a939..d44e7e11799b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -4,6 +4,7 @@ import com.yahoo.component.annotation.Inject; import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.CapacityPolicies; @@ -14,7 +15,6 @@ import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; -import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.ProvisionLogger; import com.yahoo.config.provision.Provisioner; import com.yahoo.config.provision.Zone; @@ -140,7 +140,7 @@ private NodeResources getNodeResources(ClusterSpec cluster, NodeResources nodeRe @Override public void activate(Collection hosts, ActivationContext context, ApplicationTransaction transaction) { validate(hosts); - activator.activate(hosts, context.generation(), transaction); + activator.activate(hosts, context, transaction); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index 0445e40e0698..1a7d116005fb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -70,7 +70,13 @@ public List prepare(ApplicationId application, ClusterSpec cluster, NodeSp log.log(Level.FINE, () -> "Preparing " + cluster.type().name() + " " + cluster.id() + " with requested resources " + requested.resources().orElse(NodeResources.unspecified())); - loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requested)); + try { + loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requested)); + } catch (RuntimeException e) { + if (!requested.canFail()) + log.warning("Failed to prepare load balancers for " + application + " " + cluster + ": " + Exceptions.toMessageString(e) + " (Ignoring because bootstrap deployment)"); + throw e; + } // Try preparing in memory without global unallocated lock. Most of the time there should be no changes, // and we can return nodes previously allocated. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java index 7a6507f9847f..03044319d54f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java @@ -248,7 +248,7 @@ static void toSlime(WireguardKeyWithTimestamp keyWithTimestamp, Cursor object) { private void toSlime(Snapshot snapshot, Cursor object) { object.setString("id", snapshot.id().toString()); object.setString("state", SnapshotSerializer.asString(snapshot.state())); - object.setBool("encrypted", snapshot.key().isPresent()); + object.setBool("encrypted", true); } private Optional currentContainerImage(Node node) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java index 0a5dd5753acc..03f3eeca9e88 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java @@ -266,7 +266,11 @@ private HttpResponse snapshotEncryptionKey(SnapshotId id, String hostname, Inspe private HttpResponse snapshot(String hostname) { Snapshot snapshot = nodeRepository.snapshots().create(hostname, nodeRepository.clock().instant()); - return new MessageResponse("Triggered a new snapshot of " + hostname + ": " + snapshot.id()); + Slime slime = new Slime(); + Cursor root = slime.setObject(); + root.setString("id", snapshot.id().toString()); + root.setString("message", "Triggered a new snapshot of " + hostname + ": " + snapshot.id()); + return new SlimeJsonResponse(slime); } private HttpResponse restoreSnapshot(SnapshotId id, String hostname) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index 1e3c74af2475..7880b1c063f8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -183,7 +183,7 @@ public long activate() { activations++; try (var lock = provisioner.lock(application.id)) { try (NestedTransaction t = new NestedTransaction()) { - provisioner.activate(preparedHosts, new ActivationContext(activations), new ApplicationTransaction(lock, t)); + provisioner.activate(preparedHosts, new ActivationContext(activations, false), new ApplicationTransaction(lock, t)); t.commit(); lastActivationTimes.put(application.id, clock.instant()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index d8d6b5ad0a2d..da8062162460 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -268,7 +268,7 @@ private void populate() { private void activate(List hosts, ApplicationId application, NodeRepositoryProvisioner provisioner) { try (var lock = provisioner.lock(application)) { NestedTransaction transaction = new NestedTransaction(); - provisioner.activate(hosts, new ActivationContext(0), new ApplicationTransaction(lock, transaction)); + provisioner.activate(hosts, new ActivationContext(0, false), new ApplicationTransaction(lock, transaction)); transaction.commit(); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/RealDataScenarioTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/RealDataScenarioTest.java index 50b30ef6b3a7..5c49fcbf43d9 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/RealDataScenarioTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/RealDataScenarioTest.java @@ -5,6 +5,7 @@ import com.yahoo.config.model.builder.xml.XmlHelper; import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.Cloud; @@ -16,7 +17,6 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; @@ -119,7 +119,7 @@ private void deploy(ProvisioningTester tester, ApplicationId app, ClusterSpec[] .flatMap(Collection::stream) .toList(); NestedTransaction transaction = new NestedTransaction(); - tester.provisioner().activate(hostSpecs, new ActivationContext(0), new ApplicationTransaction(new ApplicationMutex(app, () -> {}), transaction)); + tester.provisioner().activate(hostSpecs, new ActivationContext(0, false), new ApplicationTransaction(new ApplicationMutex(app, () -> {}), transaction)); transaction.commit(); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotTest.java index a3a1d48fdaad..7cf2a7aedac5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotTest.java @@ -1,14 +1,19 @@ package com.yahoo.vespa.hosted.provision.backup; +import ai.vespa.secret.model.SecretVersionId; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostName; +import com.yahoo.security.KeyId; +import com.yahoo.security.KeyUtils; +import com.yahoo.security.SecretSharedKey; +import com.yahoo.security.SharedKeyGenerator; import com.yahoo.vespa.hosted.provision.node.ClusterId; import org.junit.jupiter.api.Test; +import java.security.PublicKey; import java.time.Instant; -import java.util.Optional; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.fail; @@ -47,9 +52,12 @@ private static void assertDisallowed(Snapshot.State from, Snapshot.State to) { } private static Snapshot snapshot(Snapshot.State state) { + PublicKey publicKey = KeyUtils.generateX25519KeyPair().getPublic(); + SecretSharedKey sharedKey = SharedKeyGenerator.generateForReceiverPublicKey(publicKey, + KeyId.ofString("mykey")); return new Snapshot(Snapshot.generateId(), HostName.of("h1.example.com"), state, Snapshot.History.of(state, Instant.ofEpochMilli(123)), new ClusterId(ApplicationId.defaultId(), ClusterSpec.Id.from("c1")), - 0, CloudAccount.empty, Optional.empty()); + 0, CloudAccount.empty, new SnapshotKey(sharedKey.sealedSharedKey(), SecretVersionId.of("v1"))); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java index cc3928550829..b4f909acb25f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/backup/SnapshotsTest.java @@ -9,6 +9,7 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.security.KeyFormat; import com.yahoo.security.KeyUtils; import com.yahoo.security.SealedSharedKey; import com.yahoo.vespa.hosted.provision.Node; @@ -17,12 +18,10 @@ import java.security.KeyPair; import java.security.PublicKey; -import java.security.interfaces.XECPrivateKey; import java.util.List; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; /** * @author mpolden @@ -46,27 +45,25 @@ void snapshot() { Snapshots snapshots = tester.nodeRepository().snapshots(); String node0 = nodes.get(0).hostname(); Snapshot snapshot0 = snapshots.create(node0, tester.clock().instant()); - assertTrue(snapshot0.key().isPresent()); // Request snapshot key PublicKey receiverPublicKey = KeyUtils.generateX25519KeyPair().getPublic(); SealedSharedKey resealedKey = snapshots.keyOf(snapshot0.id(), node0, receiverPublicKey); - assertNotEquals(snapshot0.key().get().sharedKey(), resealedKey); + assertNotEquals(snapshot0.key().sharedKey(), resealedKey); // Sealing key can be rotated independently of existing snapshots KeyPair keyPair = KeyUtils.generateX25519KeyPair(); tester.secretStore().add(new Secret(Key.fromString("snapshot/sealingPrivateKey"), - KeyUtils.toBase64EncodedX25519PrivateKey((XECPrivateKey) keyPair.getPrivate()) - .getBytes(), + KeyUtils.toPem(keyPair.getPrivate(), KeyFormat.PKCS8).getBytes(), SecretVersionId.of("2"))); - assertEquals(SecretVersionId.of("1"), snapshots.require(snapshot0.id(), node0).key().get().sealingKeyVersion()); - assertNotEquals(snapshot0.key().get().sharedKey(), snapshots.keyOf(snapshot0.id(), node0, receiverPublicKey), + assertEquals(SecretVersionId.of("1"), snapshots.require(snapshot0.id(), node0).key().sealingKeyVersion()); + assertNotEquals(snapshot0.key().sharedKey(), snapshots.keyOf(snapshot0.id(), node0, receiverPublicKey), "Can reseal after key rotation"); // Next snapshot uses latest sealing key String node1 = nodes.get(1).hostname(); Snapshot snapshot1 = snapshots.create(node1, tester.clock().instant()); - assertEquals(SecretVersionId.of("2"), snapshot1.key().get().sealingKeyVersion()); + assertEquals(SecretVersionId.of("2"), snapshot1.key().sealingKeyVersion()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java index abe789bc968b..a33de70384cd 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java @@ -362,7 +362,7 @@ public FailureScenario allocate(ApplicationId applicationId, ClusterSpec cluster (level, message) -> System.out.println(level + ": " + message) ); try (var lock = provisioner.lock(applicationId)) { NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator)); - provisioner.activate(Set.copyOf(preparedNodes), new ActivationContext(0), new ApplicationTransaction(lock, transaction)); + provisioner.activate(Set.copyOf(preparedNodes), new ActivationContext(0, !capacity.canFail()), new ApplicationTransaction(lock, transaction)); transaction.commit(); } return this; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java index 999b398f8077..726558381126 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java @@ -26,11 +26,11 @@ import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.InMemoryProvisionLogger; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; import com.yahoo.vespa.hosted.provision.testutils.ServiceMonitorStub; import com.yahoo.vespa.service.duper.InfraApplication; import com.yahoo.vespa.service.duper.TenantHostApplication; -import com.yahoo.vespa.hosted.provision.testutils.InMemoryProvisionLogger; import java.time.Clock; import java.time.Duration; @@ -274,7 +274,7 @@ public void activate(ApplicationId applicationId, ClusterSpec cluster, Capacity List hosts = provisioner.prepare(applicationId, cluster, capacity, new InMemoryProvisionLogger()); try (var lock = provisioner.lock(applicationId)) { NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator)); - provisioner.activate(hosts, new ActivationContext(0), new ApplicationTransaction(lock, transaction)); + provisioner.activate(hosts, new ActivationContext(0, !capacity.canFail()), new ApplicationTransaction(lock, transaction)); transaction.commit(); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializerTest.java index e4574af18f72..4e858d630c92 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SnapshotSerializerTest.java @@ -18,7 +18,6 @@ import java.security.PublicKey; import java.time.Instant; import java.util.List; -import java.util.Optional; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -41,7 +40,7 @@ void serialization() { ClusterSpec.Id.from("c1")), 0, CloudAccount.from("aws:000123456789"), - Optional.empty() + new SnapshotKey(sharedKey.sealedSharedKey(), SecretVersionId.of("v2")) ); Snapshot snapshot1 = new Snapshot(SnapshotId.of("7e45b44a-0f1a-4729-a4f4-20fff5d1e85d"), HostName.of("host1.example.com"), @@ -52,7 +51,7 @@ void serialization() { ClusterSpec.Id.from("c2")), 2, CloudAccount.from("aws:777123456789"), - Optional.of(new SnapshotKey(sharedKey.sealedSharedKey(), SecretVersionId.of("v1"))) + new SnapshotKey(sharedKey.sealedSharedKey(), SecretVersionId.of("v1")) ); assertEquals(snapshot0, SnapshotSerializer.fromSlime(SnapshotSerializer.toSlime(snapshot0), systemAccount)); List snapshots = List.of(snapshot0, snapshot1); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 4239e21e01a9..9bdfd2850150 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -33,6 +33,7 @@ import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; import com.yahoo.jdisc.test.MockMetric; +import com.yahoo.security.KeyFormat; import com.yahoo.security.KeyUtils; import com.yahoo.test.ManualClock; import com.yahoo.transaction.NestedTransaction; @@ -69,7 +70,6 @@ import com.yahoo.vespa.service.duper.TenantHostApplication; import java.security.KeyPair; -import java.security.interfaces.XECPrivateKey; import java.time.temporal.TemporalAmount; import java.util.ArrayList; import java.util.Collection; @@ -247,7 +247,7 @@ public Collection activate(ApplicationId application, Collection #include +#include #include #include -#include -#include #include +#include +#include #include @@ -132,6 +133,12 @@ MatchEngine::doSearch(const SearchRequest & searchRequest) { // 3 is the minimum level required for backend tracing. searchRequest.setTraceLevel(trace::Level::lookup(searchRequest.propertiesMap.modelOverrides(), searchRequest.trace().getLevel()), 3); + searchRequest.trace().addEvent(4, + vespalib::make_string("searching for %u hits at offset %u%s%s", + searchRequest.maxhits, + searchRequest.offset, + searchRequest.sortSpec.empty() ? "" : " (with sorting)", + searchRequest.groupSpec.empty() ? "" : " (with grouping)")); ISearchHandler::SP searchHandler; auto threadBundle = _threadBundlePool.getBundle(); { // try to find the match handler corresponding to the specified search doc type @@ -155,6 +162,9 @@ MatchEngine::doSearch(const SearchRequest & searchRequest) { if (searchRequest.expired()) { vespalib::Issue::report("search request timed out; results may be incomplete"); } + searchRequest.trace().addEvent(4, + vespalib::make_string("returning %zu hits from total %zu", + ret->hits.size(), ret->totalHitCount)); return ret; } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/metrics/CMakeLists.txt index dae64398293f..6ea9a976cc4d 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/metrics/CMakeLists.txt @@ -5,6 +5,7 @@ vespa_add_library(searchcore_proton_metrics STATIC attribute_metrics_entry.cpp cache_metrics.cpp content_proton_metrics.cpp + disk_io_metrics.cpp documentdb_job_trackers.cpp documentdb_tagged_metrics.cpp document_db_commit_metrics.cpp diff --git a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.cpp index 5c16c668fe56..a512274b80a1 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.cpp @@ -18,7 +18,8 @@ CacheMetrics::CacheMetrics(MetricSet *parent, const std::string& name, const std hitRate("hit_rate", {}, "Rate of hits in the cache compared to number of lookups", this), lookups("lookups", {}, "Number of lookups in the cache (hits + misses)", this), invalidations("invalidations", {}, "Number of invalidations (erased elements) in the cache.", this), - _cache_name(cache_name) + _cache_name(cache_name), + _last_stats() { } @@ -51,13 +52,14 @@ CacheMetrics::update_count_metric(uint64_t currVal, uint64_t lastVal, metrics::L } void -CacheMetrics::update_metrics(const CacheStats& current, const CacheStats& last) +CacheMetrics::update_metrics(const CacheStats& stats) { - memoryUsage.set(current.memory_used); - elements.set(current.elements); - update_hit_rate(current, last); - update_count_metric(current.lookups(), last.lookups(), lookups); - update_count_metric(current.invalidations, last.invalidations, invalidations); + memoryUsage.set(stats.memory_used); + elements.set(stats.elements); + update_hit_rate(stats, _last_stats); + update_count_metric(stats.lookups(), _last_stats.lookups(), lookups); + update_count_metric(stats.invalidations, _last_stats.invalidations, invalidations); + _last_stats = stats; } } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h index b38a21bb2d45..b9066a4a6288 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/cache_metrics.h @@ -4,10 +4,9 @@ #include #include #include +#include #include -namespace vespalib { struct CacheStats; } - namespace proton { /** @@ -16,10 +15,11 @@ namespace proton { class CacheMetrics : public metrics::MetricSet { metrics::LongValueMetric memoryUsage; metrics::LongValueMetric elements; - metrics::LongAverageMetric hitRate; + metrics::DoubleAverageMetric hitRate; metrics::LongCountMetric lookups; metrics::LongCountMetric invalidations; std::string _cache_name; + vespalib::CacheStats _last_stats; void update_hit_rate(const vespalib::CacheStats ¤t, const vespalib::CacheStats &last); static void update_count_metric(uint64_t currVal, uint64_t lastVal, metrics::LongCountMetric &metric); @@ -27,7 +27,7 @@ class CacheMetrics : public metrics::MetricSet { CacheMetrics(metrics::MetricSet* parent, const std::string& name, const std::string& description, const std::string& cache_name); ~CacheMetrics() override; - void update_metrics(const vespalib::CacheStats& current, const vespalib::CacheStats& last); + void update_metrics(const vespalib::CacheStats& stats); }; } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.cpp index 06a43725136b..2c44db6f3ef8 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.cpp @@ -29,7 +29,8 @@ ContentProtonMetrics::ProtonExecutorMetrics::~ProtonExecutorMetrics() = default; ContentProtonMetrics::IndexMetrics::CacheMetrics::CacheMetrics(metrics::MetricSet* parent) : metrics::MetricSet("cache", {}, "Metrics for caches", parent), - postinglist(this, "postinglist", "Posting list cache metrics", "postinglist_cache") + postinglist(this, "postinglist", "Posting list cache metrics", "postinglist_cache"), + bitvector(this, "bitvector", "Bitvector cache metrics", "bitvector_cache") { } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.h index 75167f1771b7..d3769c7a6f6b 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/content_proton_metrics.h @@ -46,6 +46,7 @@ struct ContentProtonMetrics : metrics::MetricSet struct CacheMetrics : public metrics::MetricSet { proton::CacheMetrics postinglist; + proton::CacheMetrics bitvector; explicit CacheMetrics(metrics::MetricSet* parent); ~CacheMetrics() override; diff --git a/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp new file mode 100644 index 000000000000..f1f4234ad730 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.cpp @@ -0,0 +1,44 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "disk_io_metrics.h" +#include + +using search::DiskIoStats; +using search::FieldIndexIoStats; + +namespace proton { + +namespace { + +void update_helper(metrics::LongValueMetric &metric, const DiskIoStats &stats) { + metric.addTotalValueBatch(stats.read_bytes_total(), stats.read_operations(), + stats.read_bytes_min(), stats.read_bytes_max()); +} + +} + +DiskIoMetrics::SearchMetrics::SearchMetrics(metrics::MetricSet* parent) + : MetricSet("search", {}, "The search io for a given component", parent), + _read_bytes("read_bytes", {}, "Bytes read in posting list files as part of search", this), + _cached_read_bytes("cached_read_bytes", {}, "Bytes read from posting list files cache as part of search", this) +{ +} + +DiskIoMetrics::SearchMetrics::~SearchMetrics() = default; + +void +DiskIoMetrics::SearchMetrics::update(const FieldIndexIoStats& io_stats) +{ + update_helper(_read_bytes, io_stats.read()); + update_helper(_cached_read_bytes, io_stats.cached_read()); +} + +DiskIoMetrics::DiskIoMetrics(metrics::MetricSet* parent) + : MetricSet("io", {}, "The disk usage for a given component", parent), + _search(this) +{ +} + +DiskIoMetrics::~DiskIoMetrics() = default; + +} diff --git a/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h new file mode 100644 index 000000000000..25f91243f036 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/metrics/disk_io_metrics.h @@ -0,0 +1,34 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { class FieldIndexIoStats; } + +namespace proton { + +/* + * Class containing disk io metrics, e.g. per index field or + * aggregated at document type level. + */ +class DiskIoMetrics : public metrics::MetricSet { + class SearchMetrics : public metrics::MetricSet { + metrics::LongValueMetric _read_bytes; + metrics::LongValueMetric _cached_read_bytes; + public: + explicit SearchMetrics(metrics::MetricSet* parent); + ~SearchMetrics() override; + void update(const search::FieldIndexIoStats& io_stats); + }; + + SearchMetrics _search; + +public: + explicit DiskIoMetrics(metrics::MetricSet* parent); + ~DiskIoMetrics() override; + void update(const search::FieldIndexIoStats& io_stats) { _search.update(io_stats); } +}; + +} diff --git a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp index c1e87a834b6c..af03133edcd3 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp @@ -93,7 +93,8 @@ DocumentDBTaggedMetrics::IndexMetrics::IndexMetrics(MetricSet *parent) : MetricSet("index", {}, "Index metrics (memory and disk) for this document db", parent), diskUsage("disk_usage", {}, "Disk space usage in bytes", this), memoryUsage(this), - docsInMemory("docs_in_memory", {}, "Number of documents in memory index", this) + docsInMemory("docs_in_memory", {}, "Number of documents in memory index", this), + disk_io(this) { } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h index 6191678c2bda..66fed96263fd 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h @@ -97,6 +97,7 @@ struct DocumentDBTaggedMetrics : metrics::MetricSet metrics::LongValueMetric diskUsage; MemoryUsageMetrics memoryUsage; metrics::LongValueMetric docsInMemory; + DiskIoMetrics disk_io; IndexMetrics(metrics::MetricSet *parent); ~IndexMetrics() override; diff --git a/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.cpp b/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.cpp index 700b43e356a8..ee5b836b2277 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.cpp @@ -7,10 +7,10 @@ namespace proton { FieldMetricsEntry::FieldMetricsEntry(const std::string& name, const std::string& field_name, const std::string& description) : metrics::MetricSet(name, {{"field", field_name}}, description, nullptr), memoryUsage(this), - size_on_disk("size_on_disk", {}, "Size on disk (bytes)", this) + disk_usage("disk_usage", {}, "Disk space usage (in bytes)", this) { } FieldMetricsEntry::~FieldMetricsEntry() = default; -} // namespace proton +} diff --git a/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.h b/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.h index 62d15d478e19..f023c5937df2 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/field_metrics_entry.h @@ -11,9 +11,9 @@ namespace proton { */ struct FieldMetricsEntry : public metrics::MetricSet { MemoryUsageMetrics memoryUsage; - metrics::LongValueMetric size_on_disk; + metrics::LongValueMetric disk_usage; FieldMetricsEntry(const std::string& name, const std::string& field_name, const std::string& description); ~FieldMetricsEntry() override; }; -} // namespace proton +} diff --git a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp index 3c3617cf14ba..b48d7c46e892 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.cpp @@ -1,10 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "index_metrics_entry.h" -#include - -using search::CacheDiskIoStats; -using search::DiskIoStats; +#include namespace proton { @@ -13,37 +10,8 @@ namespace { const std::string entry_name("index"); const std::string entry_description("Metrics for indexes for a given field"); -void update_helper(metrics::LongValueMetric &metric, const DiskIoStats &stats) { - metric.addTotalValueBatch(stats.read_bytes_total(), stats.read_operations(), - stats.read_bytes_min(), stats.read_bytes_max()); -} - -} - -IndexMetricsEntry::DiskIoMetrics::SearchMetrics::SearchMetrics(metrics::MetricSet* parent) - : MetricSet("search", {}, "The search io for a given component", parent), - _read_bytes("read_bytes", {}, "Bytes read in posting list files as part of search", this), - _cached_read_bytes("cached_read_bytes", {}, "Bytes read from posting list files cache as part of search", this) -{ -} - -IndexMetricsEntry::DiskIoMetrics::SearchMetrics::~SearchMetrics() = default; - -void -IndexMetricsEntry::DiskIoMetrics::SearchMetrics::update(const CacheDiskIoStats& cache_disk_io_stats) -{ - update_helper(_read_bytes, cache_disk_io_stats.read()); - update_helper(_cached_read_bytes, cache_disk_io_stats.cached_read()); } -IndexMetricsEntry::DiskIoMetrics::DiskIoMetrics(metrics::MetricSet* parent) - : MetricSet("io", {}, "The disk usage for a given component", parent), - _search(this) -{ -} - -IndexMetricsEntry::DiskIoMetrics::~DiskIoMetrics() = default; - IndexMetricsEntry::IndexMetricsEntry(const std::string& field_name) : FieldMetricsEntry(entry_name, field_name, entry_description), _disk_io(this) diff --git a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h index c2783d99e0e4..4454a1947ab8 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/index_metrics_entry.h @@ -2,10 +2,9 @@ #pragma once +#include "disk_io_metrics.h" #include "field_metrics_entry.h" -namespace search { class CacheDiskIoStats; } - namespace proton { /* @@ -13,30 +12,12 @@ namespace proton { * disk indexes and memory indexes. */ class IndexMetricsEntry : public FieldMetricsEntry { - class DiskIoMetrics : public metrics::MetricSet { - class SearchMetrics : public metrics::MetricSet { - metrics::LongValueMetric _read_bytes; - metrics::LongValueMetric _cached_read_bytes; - public: - explicit SearchMetrics(metrics::MetricSet* parent); - ~SearchMetrics() override; - void update(const search::CacheDiskIoStats& cache_disk_io_stats); - }; - - SearchMetrics _search; - - public: - explicit DiskIoMetrics(metrics::MetricSet* parent); - ~DiskIoMetrics() override; - void update(const search::CacheDiskIoStats& cache_disk_io_stats) { _search.update(cache_disk_io_stats); } - }; - DiskIoMetrics _disk_io; public: explicit IndexMetricsEntry(const std::string& field_name); ~IndexMetricsEntry() override; - void update_disk_io(const search::CacheDiskIoStats& cache_disk_io_stats) { _disk_io.update(cache_disk_io_stats); } + void update_disk_io(const search::FieldIndexIoStats& io_stats) { _disk_io.update(io_stats); } }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/reference/gid_to_lid_change_handler.cpp b/searchcore/src/vespa/searchcore/proton/reference/gid_to_lid_change_handler.cpp index e7e24c908d75..c3642404b0e2 100644 --- a/searchcore/src/vespa/searchcore/proton/reference/gid_to_lid_change_handler.cpp +++ b/searchcore/src/vespa/searchcore/proton/reference/gid_to_lid_change_handler.cpp @@ -82,12 +82,12 @@ GidToLidChangeHandler::notifyRemoves(IDestructorCallbackSP context, const std::v assert(entry.removeSerialNum < serialNum); assert(entry.putSerialNum < serialNum); if (entry.removeSerialNum < entry.putSerialNum) { - notifyRemove(std::move(context), gid); + notifyRemove(context, gid); } entry.removeSerialNum = serialNum; ++entry.refCount; } else { - notifyRemove(std::move(context), gid); + notifyRemove(context, gid); } _pending_changes.emplace_back(IDestructorCallbackSP(), gid, 0, serialNum, true); } diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp index 73fa84685cb0..ea7c9829534c 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -43,7 +43,6 @@ DocumentDBMetricsUpdater::DocumentDBMetricsUpdater(const DocumentSubDBCollection _jobTrackers(jobTrackers), _writeFilter(writeFilter), _feed_handler(feed_handler), - _lastDocStoreCacheStats(), _last_feed_handler_stats() { } @@ -73,21 +72,24 @@ updateDiskUsageMetric(metrics::LongValueMetric &metric, uint64_t diskUsage, Tota } void -updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::SearchableStats &stats, TotalStats &totalStats) +updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::IndexStats &stats, TotalStats &totalStats) { DocumentDBTaggedMetrics::IndexMetrics &indexMetrics = metrics.index; updateDiskUsageMetric(indexMetrics.diskUsage, stats.sizeOnDisk(), totalStats); updateMemoryUsageMetrics(indexMetrics.memoryUsage, stats.memoryUsage(), totalStats); indexMetrics.docsInMemory.set(stats.docsInMemory()); auto& field_metrics = metrics.ready.index; + search::FieldIndexIoStats disk_io; for (auto& field : stats.get_field_stats()) { auto entry = field_metrics.get_field_metrics_entry(field.first); if (entry) { entry->memoryUsage.update(field.second.memory_usage()); - entry->size_on_disk.set(field.second.size_on_disk()); - entry->update_disk_io(field.second.cache_disk_io_stats()); + entry->disk_usage.set(field.second.size_on_disk()); + entry->update_disk_io(field.second.io_stats()); } + disk_io.merge(field.second.io_stats()); } + indexMetrics.disk_io.update(disk_io); } struct TempAttributeMetric @@ -182,7 +184,7 @@ updateAttributeMetrics(AttributeMetrics &metrics, const TempAttributeMetrics &tm auto entry = metrics.get_field_metrics_entry(attr.first); if (entry) { entry->memoryUsage.update(attr.second.memoryUsage); - entry->size_on_disk.set(attr.second.size_on_disk); + entry->disk_usage.set(attr.second.size_on_disk); } } } @@ -233,7 +235,6 @@ updateDocumentsMetrics(DocumentDBTaggedMetrics &metrics, const DocumentSubDBColl void updateDocumentStoreMetrics(DocumentDBTaggedMetrics::SubDBMetrics::DocumentStoreMetrics &metrics, const IDocumentSubDB *subDb, - CacheStats &lastCacheStats, TotalStats &totalStats) { const ISummaryManager::SP &summaryMgr = subDb->getSummaryManager(); @@ -246,17 +247,16 @@ updateDocumentStoreMetrics(DocumentDBTaggedMetrics::SubDBMetrics::DocumentStoreM vespalib::CacheStats cacheStats = backingStore.getCacheStats(); totalStats.memoryUsage.incAllocatedBytes(cacheStats.memory_used); - metrics.cache.update_metrics(cacheStats, lastCacheStats); - lastCacheStats = cacheStats; + metrics.cache.update_metrics(cacheStats); } void updateDocumentStoreMetrics(DocumentDBTaggedMetrics &metrics, const DocumentSubDBCollection &subDBs, - DocumentDBMetricsUpdater::DocumentStoreCacheStats &lastDocStoreCacheStats, TotalStats &totalStats) + TotalStats &totalStats) { - updateDocumentStoreMetrics(metrics.ready.documentStore, subDBs.getReadySubDB(), lastDocStoreCacheStats.readySubDb, totalStats); - updateDocumentStoreMetrics(metrics.removed.documentStore, subDBs.getRemSubDB(), lastDocStoreCacheStats.removedSubDb, totalStats); - updateDocumentStoreMetrics(metrics.notReady.documentStore, subDBs.getNotReadySubDB(), lastDocStoreCacheStats.notReadySubDb, totalStats); + updateDocumentStoreMetrics(metrics.ready.documentStore, subDBs.getReadySubDB(), totalStats); + updateDocumentStoreMetrics(metrics.removed.documentStore, subDBs.getRemSubDB(), totalStats); + updateDocumentStoreMetrics(metrics.notReady.documentStore, subDBs.getNotReadySubDB(), totalStats); } template @@ -300,11 +300,11 @@ DocumentDBMetricsUpdater::updateMetrics(const metrics::MetricLockGuard & guard, { TotalStats totalStats; ExecutorThreadingServiceStats threadingServiceStats = _writeService.getStats(); - updateIndexMetrics(metrics, _subDBs.getReadySubDB()->getSearchableStats(), totalStats); + updateIndexMetrics(metrics, _subDBs.getReadySubDB()->get_index_stats(true), totalStats); updateAttributeMetrics(metrics, _subDBs, totalStats); updateMatchingMetrics(guard, metrics, *_subDBs.getReadySubDB()); updateDocumentsMetrics(metrics, _subDBs); - updateDocumentStoreMetrics(metrics, _subDBs, _lastDocStoreCacheStats, totalStats); + updateDocumentStoreMetrics(metrics, _subDBs, totalStats); updateMiscMetrics(metrics, threadingServiceStats); metrics.totalMemoryUsage.update(totalStats.memoryUsage); diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.h b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.h index 3573d391b379..da734d9efe16 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.h +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.h @@ -20,23 +20,11 @@ class FeedHandler; * Class used to update metrics for a document db. */ class DocumentDBMetricsUpdater { -public: - - struct DocumentStoreCacheStats { - vespalib::CacheStats readySubDb; - vespalib::CacheStats notReadySubDb; - vespalib::CacheStats removedSubDb; - DocumentStoreCacheStats() : readySubDb(), notReadySubDb(), removedSubDb() {} - }; - -private: const DocumentSubDBCollection &_subDBs; ExecutorThreadingService &_writeService; DocumentDBJobTrackers &_jobTrackers; const AttributeUsageFilter &_writeFilter; FeedHandler &_feed_handler; - // Last updated document store cache statistics. Necessary due to metrics implementation is upside down. - DocumentStoreCacheStats _lastDocStoreCacheStats; std::optional _last_feed_handler_stats; void updateMiscMetrics(DocumentDBTaggedMetrics &metrics, const ExecutorThreadingServiceStats &threadingServiceStats); diff --git a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h index d5eaf2db2e74..f02f3495bce1 100644 --- a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -122,7 +122,7 @@ class IDocumentSubDB virtual SerialNum getNewestFlushedSerial() = 0; virtual void pruneRemovedFields(SerialNum serialNum) = 0; virtual void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) = 0; - virtual search::SearchableStats getSearchableStats() const = 0; + virtual search::IndexStats get_index_stats(bool clear_disk_io_stats) const = 0; virtual std::shared_ptr getDocumentRetriever() = 0; virtual matching::MatchingStats getMatcherStats(const std::string &rankProfile) const = 0; diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp index fe4cacbbeddf..8153248cfd76 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp @@ -165,10 +165,11 @@ void ensureWritableDir(const std::string &dirName) { std::shared_ptr make_posting_list_cache(const ProtonConfig& cfg) { - if (cfg.search.io == ProtonConfig::Search::Io::MMAP || cfg.index.cache.postinglist.maxbytes == 0) { + if (cfg.search.io == ProtonConfig::Search::Io::MMAP || + (cfg.index.cache.postinglist.maxbytes == 0 && cfg.index.cache.bitvector.maxbytes == 0)) { return {}; } - return std::make_shared(cfg.index.cache.postinglist.maxbytes); + return std::make_shared(cfg.index.cache.postinglist.maxbytes, cfg.index.cache.bitvector.maxbytes); } } // namespace @@ -281,8 +282,7 @@ Proton::Proton(FNET_Transport & transport, const config::ConfigUri & configUri, _documentDBReferenceRegistry(std::make_shared()), _nodeUpLock(), _nodeUp(), - _posting_list_cache(), - _last_posting_list_cache_stats() + _posting_list_cache() { } BootstrapConfig::SP @@ -872,9 +872,8 @@ Proton::updateMetrics(const metrics::MetricLockGuard &) } } if (_posting_list_cache) { - auto stats = _posting_list_cache->get_stats(); - _metricsEngine->root().index.cache.postinglist.update_metrics(stats, _last_posting_list_cache_stats); - _last_posting_list_cache_stats = stats; + _metricsEngine->root().index.cache.postinglist.update_metrics(_posting_list_cache->get_stats()); + _metricsEngine->root().index.cache.bitvector.update_metrics(_posting_list_cache->get_bitvector_stats()); } } diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h index 4b08a83c681a..120dc11be225 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.h +++ b/searchcore/src/vespa/searchcore/proton/server/proton.h @@ -132,7 +132,6 @@ class Proton : public IProtonConfigurerOwner, std::mutex _nodeUpLock; std::set _nodeUp; // bucketspaces where node is up std::shared_ptr _posting_list_cache; - vespalib::CacheStats _last_posting_list_cache_stats; std::shared_ptr addDocumentDB(const DocTypeName & docTypeName, BucketSpace bucketSpace, const std::string & configid, diff --git a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp index 1000ac5f55e3..4ac1ad837dea 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp @@ -318,10 +318,10 @@ SearchableDocSubDB::getNumActiveDocs() const return (metaStoreCtx) ? metaStoreCtx->getReadGuard()->get().getNumActiveLids() : 0; } -search::SearchableStats -SearchableDocSubDB::getSearchableStats() const +search::IndexStats +SearchableDocSubDB::get_index_stats(bool clear_disk_io_stats) const { - return _indexMgr ? _indexMgr->getSearchableStats() : search::SearchableStats(); + return _indexMgr ? _indexMgr->get_index_stats(clear_disk_io_stats) : search::IndexStats(); } std::shared_ptr @@ -375,7 +375,7 @@ SearchableDocSubDB::get_transient_resource_usage() const auto result = FastAccessDocSubDB::get_transient_resource_usage(); // Transient disk usage is measured as the total disk usage of all current fusion indexes. // Transient memory usage is measured as the total memory usage of all memory indexes. - auto stats = getSearchableStats(); + auto stats = get_index_stats(false); result.merge({stats.fusion_size_on_disk(), stats.memoryUsage().allocatedBytes()}); return result; } diff --git a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h index 7436a89382ba..5cae45e9f5f9 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h @@ -134,7 +134,7 @@ SearchableDocSubDB : public FastAccessDocSubDB, SerialNum getNewestFlushedSerial() override; void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) override; size_t getNumActiveDocs() const override; - search::SearchableStats getSearchableStats() const override ; + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override ; std::shared_ptr getDocumentRetriever() override; matching::MatchingStats getMatcherStats(const std::string &rankProfile) const override; void close() override; diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp index a94ee90877f2..6cdc3de2b6c9 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp @@ -543,8 +543,8 @@ StoreOnlyDocSubDB::setIndexSchema(std::shared_ptr, SerialNum) assert(_writeService.master().isCurrentThread()); } -search::SearchableStats -StoreOnlyDocSubDB::getSearchableStats() const +search::IndexStats +StoreOnlyDocSubDB::get_index_stats(bool) const { return {}; } diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h index 182234bb86d8..d96c0a6a1cf3 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h @@ -231,7 +231,7 @@ class StoreOnlyDocSubDB : public DocSubDB void pruneRemovedFields(SerialNum serialNum) override; void setIndexSchema(std::shared_ptr schema, SerialNum serialNum) override; - search::SearchableStats getSearchableStats() const override; + search::IndexStats get_index_stats(bool) const override; std::shared_ptr getDocumentRetriever() override; matching::MatchingStats getMatcherStats(const std::string &rankProfile) const override; void close() override; diff --git a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h index 7155e120a12f..9d5d56b003a1 100644 --- a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h +++ b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h @@ -74,7 +74,7 @@ struct DummyDocumentSubDb : public IDocumentSubDB SerialNum getNewestFlushedSerial() override { return 0; } void pruneRemovedFields(SerialNum) override { } void setIndexSchema(std::shared_ptr, SerialNum) override { } - search::SearchableStats getSearchableStats() const override { + search::IndexStats get_index_stats(bool) const override { return {}; } std::shared_ptr getDocumentRetriever() override { diff --git a/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h b/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h index 53af2381e7fe..38f14c4e5df9 100644 --- a/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h +++ b/searchcore/src/vespa/searchcore/proton/test/mock_index_manager.h @@ -19,8 +19,8 @@ struct MockIndexManager : public searchcorespi::IIndexManager searchcorespi::IndexSearchable::SP getSearchable() const override { return searchcorespi::IndexSearchable::SP(); } - search::SearchableStats getSearchableStats() const override { - return search::SearchableStats(); + search::IndexStats get_index_stats(bool) const override { + return search::IndexStats(); } searchcorespi::IFlushTarget::List getFlushTargets() override { return searchcorespi::IFlushTarget::List(); diff --git a/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h b/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h index fa34ad410010..9379e1da7226 100644 --- a/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h +++ b/searchcore/src/vespa/searchcorespi/index/fakeindexsearchable.h @@ -28,8 +28,8 @@ class FakeIndexSearchable : public IndexSearchable { return _fake.createBlueprint(requestContext, field, term); } - search::SearchableStats getSearchableStats() const override { - return search::SearchableStats(); + search::IndexStats get_index_stats(bool) const override { + return search::IndexStats(); } search::SerialNum getSerialNum() const override { return 0; } diff --git a/searchcore/src/vespa/searchcorespi/index/iindexmanager.h b/searchcore/src/vespa/searchcorespi/index/iindexmanager.h index a54dc422f645..b441ce0715c5 100644 --- a/searchcore/src/vespa/searchcorespi/index/iindexmanager.h +++ b/searchcore/src/vespa/searchcorespi/index/iindexmanager.h @@ -174,11 +174,11 @@ class IIndexManager { virtual IndexSearchable::SP getSearchable() const = 0; /** - * Returns searchable stats for this index manager. + * Returns index stats for this index manager. * * @return statistics gathered about underlying memory and disk indexes. */ - virtual search::SearchableStats getSearchableStats() const = 0; + virtual search::IndexStats get_index_stats(bool clear_disk_io_stats) const = 0; /** * Returns the list of all flush targets contained in this index manager. diff --git a/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp b/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp index 051175be9afb..d35eb33f7af0 100644 --- a/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp +++ b/searchcore/src/vespa/searchcorespi/index/index_manager_explorer.cpp @@ -8,7 +8,7 @@ using vespalib::slime::Cursor; using vespalib::slime::Inserter; -using search::SearchableStats; +using search::IndexStats; using searchcorespi::index::DiskIndexStats; using searchcorespi::index::MemoryIndexStats; @@ -20,10 +20,16 @@ void insertDiskIndex(Cursor &arrayCursor, const DiskIndexStats &diskIndex) { Cursor &diskIndexCursor = arrayCursor.addObject(); - const SearchableStats &sstats = diskIndex.getSearchableStats(); + const IndexStats &sstats = diskIndex.get_index_stats(); diskIndexCursor.setLong("serialNum", diskIndex.getSerialNum()); diskIndexCursor.setString("indexDir", diskIndex.getIndexdir()); - diskIndexCursor.setLong("sizeOnDisk", sstats.sizeOnDisk()); + diskIndexCursor.setLong("disk_usage", sstats.sizeOnDisk()); + auto& fields = diskIndexCursor.setArray("fields"); + for (auto& field_stats : sstats.get_field_stats()) { + auto& field = fields.addObject(); + field.setString("name", field_stats.first); + field.setLong("disk_usage", field_stats.second.size_on_disk()); + } } void @@ -40,7 +46,7 @@ void insertMemoryIndex(Cursor &arrayCursor, const MemoryIndexStats &memoryIndex) { Cursor &memoryIndexCursor = arrayCursor.addObject(); - const SearchableStats &sstats = memoryIndex.getSearchableStats(); + const IndexStats &sstats = memoryIndex.get_index_stats(); memoryIndexCursor.setLong("serialNum", memoryIndex.getSerialNum()); memoryIndexCursor.setLong("docsInMemory", sstats.docsInMemory()); insertMemoryUsage(memoryIndexCursor, sstats.memoryUsage()); diff --git a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp index f74c9c581326..fb916058934d 100644 --- a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp +++ b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.cpp @@ -8,13 +8,13 @@ namespace searchcorespi::index { IndexSearchableStats::IndexSearchableStats() : _serialNum(0), - _searchableStats() + _index_stats() { } IndexSearchableStats::IndexSearchableStats(const IndexSearchable &index) : _serialNum(index.getSerialNum()), - _searchableStats(index.getSearchableStats()) + _index_stats(index.get_index_stats(false)) { } diff --git a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.h b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.h index 023649524706..02f68cf8735c 100644 --- a/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.h +++ b/searchcore/src/vespa/searchcorespi/index/index_searchable_stats.h @@ -3,7 +3,7 @@ #pragma once #include -#include +#include namespace searchcorespi { class IndexSearchable; } @@ -15,15 +15,15 @@ namespace searchcorespi::index { class IndexSearchableStats { using SerialNum = search::SerialNum; - using SearchableStats = search::SearchableStats; + using IndexStats = search::IndexStats; SerialNum _serialNum; - SearchableStats _searchableStats; + IndexStats _index_stats; public: IndexSearchableStats(); IndexSearchableStats(const IndexSearchable &index); bool operator<(const IndexSearchableStats &rhs) const; SerialNum getSerialNum() const { return _serialNum; } - const SearchableStats &getSearchableStats() const { return _searchableStats; } + const IndexStats &get_index_stats() const { return _index_stats; } }; } diff --git a/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp b/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp index dfc25865dc14..b6183e34ee13 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexcollection.cpp @@ -108,12 +108,12 @@ IndexCollection::getSourceId(uint32_t i) const return _sources[i].id; } -search::SearchableStats -IndexCollection::getSearchableStats() const +search::IndexStats +IndexCollection::get_index_stats(bool clear_disk_io_stats) const { - search::SearchableStats stats; + search::IndexStats stats; for (size_t i = 0; i < _sources.size(); ++i) { - stats.merge(_sources[i].source_wrapper->getSearchableStats()); + stats.merge(_sources[i].source_wrapper->get_index_stats(clear_disk_io_stats)); } return stats; } diff --git a/searchcore/src/vespa/searchcorespi/index/indexcollection.h b/searchcore/src/vespa/searchcorespi/index/indexcollection.h index 6f7e4c3c29a4..455f41637f2b 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexcollection.h +++ b/searchcore/src/vespa/searchcorespi/index/indexcollection.h @@ -3,7 +3,7 @@ #pragma once #include "isearchableindexcollection.h" -#include +#include namespace searchcorespi { @@ -50,7 +50,7 @@ class IndexCollection : public ISearchableIndexCollection createBlueprint(const IRequestContext & requestContext, const FieldSpec &field, const Node &term) override; std::unique_ptr createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) override; - search::SearchableStats getSearchableStats() const override; + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override; void accept(IndexSearchableVisitor &visitor) const override; diff --git a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp index f0300a1c4827..147b7eeef69b 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.cpp @@ -135,7 +135,7 @@ class DiskIndexWithDestructorCallback : public IDiskIndex { { return _index->createBlueprint(requestContext, fields, term); } - search::SearchableStats getSearchableStats() const override; + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override { return _index->getSerialNum(); } @@ -160,10 +160,10 @@ class DiskIndexWithDestructorCallback : public IDiskIndex { DiskIndexWithDestructorCallback::~DiskIndexWithDestructorCallback() = default; -search::SearchableStats -DiskIndexWithDestructorCallback::getSearchableStats() const +search::IndexStats +DiskIndexWithDestructorCallback::get_index_stats(bool clear_disk_io_stats) const { - auto stats = _index->getSearchableStats(); + auto stats = _index->get_index_stats(clear_disk_io_stats); uint64_t transient_size = _disk_indexes.get_transient_size(_layout, _index_disk_dir); stats.fusion_size_on_disk(transient_size); return stats; @@ -315,7 +315,7 @@ IndexMaintainer::loadDiskIndex(const string &indexDir) } vespalib::Timer timer; auto index = _operations.loadDiskIndex(indexDir); - auto stats = index->getSearchableStats(); + auto stats = index->get_index_stats(false); _disk_indexes->setActive(indexDir, stats.sizeOnDisk()); auto retval = std::make_shared( std::move(index), @@ -338,7 +338,7 @@ IndexMaintainer::reloadDiskIndex(const IDiskIndex &oldIndex) vespalib::Timer timer; const IDiskIndex &wrappedDiskIndex = (dynamic_cast(oldIndex)).getWrapped(); auto index = _operations.reloadDiskIndex(wrappedDiskIndex); - auto stats = index->getSearchableStats(); + auto stats = index->get_index_stats(false); _disk_indexes->setActive(indexDir, stats.sizeOnDisk()); auto retval = std::make_shared( std::move(index), @@ -1184,7 +1184,7 @@ IndexMaintainer::getFusionStats() const source_list = _source_list; stats.maxFlushed = _maxFlushed; } - stats.diskUsage = source_list->getSearchableStats().sizeOnDisk(); + stats.diskUsage = source_list->get_index_stats(false).sizeOnDisk(); { LockGuard guard(_fusion_lock); stats.numUnfused = _fusion_spec.flush_ids.size() + ((_fusion_spec.last_fusion_id != 0) ? 1 : 0); diff --git a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h index 89ded5fd2866..a713755d64ec 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h +++ b/searchcore/src/vespa/searchcorespi/index/indexmaintainer.h @@ -361,9 +361,9 @@ class IndexMaintainer : public IIndexManager, return _source_list; } - search::SearchableStats getSearchableStats() const override { + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override { LockGuard lock(_new_search_lock); - return _source_list->getSearchableStats(); + return _source_list->get_index_stats(clear_disk_io_stats); } IFlushTarget::List getFlushTargets() override; diff --git a/searchcore/src/vespa/searchcorespi/index/indexsearchable.h b/searchcore/src/vespa/searchcorespi/index/indexsearchable.h index 8beb0ab7a61b..5811d0f812c4 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexsearchable.h +++ b/searchcore/src/vespa/searchcorespi/index/indexsearchable.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace searchcorespi { @@ -38,9 +38,9 @@ class IndexSearchable : public search::queryeval::Searchable, using SP = std::shared_ptr; /** - * Returns the searchable stats for this index searchable. + * Returns the index stats for this index searchable. */ - virtual search::SearchableStats getSearchableStats() const = 0; + virtual search::IndexStats get_index_stats(bool clear_disk_io_stats) const = 0; /** * Returns the serial number for this index searchable. diff --git a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp index 3d396b1cf913..0da8518f8219 100644 --- a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp +++ b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.cpp @@ -225,10 +225,10 @@ WarmupIndexCollection::createBlueprint(const IRequestContext & requestContext, return _prev->createBlueprint(requestContext, fields, term); } -search::SearchableStats -WarmupIndexCollection::getSearchableStats() const +search::IndexStats +WarmupIndexCollection::get_index_stats(bool clear_disk_io_stats) const { - return _prev->getSearchableStats(); + return _prev->get_index_stats(clear_disk_io_stats); } diff --git a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h index 7503b1173c59..28c5c3bcf4a0 100644 --- a/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h +++ b/searchcore/src/vespa/searchcorespi/index/warmupindexcollection.h @@ -48,7 +48,7 @@ class WarmupIndexCollection : public ISearchableIndexCollection, createBlueprint(const IRequestContext & requestContext, const FieldSpec &field, const Node &term) override; std::unique_ptr createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) override; - search::SearchableStats getSearchableStats() const override; + search::IndexStats get_index_stats(bool clear_disk_io_stats) const override; search::SerialNum getSerialNum() const override; void accept(IndexSearchableVisitor &visitor) const override; diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 851e480d7832..d060fa489a7e 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -253,7 +253,7 @@ vespa_define_module( src/tests/util src/tests/util/bufferwriter src/tests/util/folded_string_compare - src/tests/util/searchable_stats + src/tests/util/index_stats src/tests/util/slime_output_raw_buf_adapter src/tests/util/token_extractor src/tests/vespa-fileheader-inspect diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index ee8bbae184ed..213b4360dc9a 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -24,9 +24,11 @@ using search::BitVector; using search::BitVectorIterator; using search::diskindex::DiskIndex; using search::diskindex::DiskTermBlueprint; +using search::diskindex::FieldIndex; using search::diskindex::TestDiskIndex; using search::diskindex::ZcRareWordPosOccIterator; using search::fef::TermFieldMatchDataArray; +using search::index::DictionaryLookupResult; using search::index::DummyFileHeaderContext; using search::index::PostingListHandle; using search::index::Schema; @@ -128,7 +130,7 @@ class DiskIndexTest : public ::testing::Test, public TestDiskIndex { static void SetUpTestSuite(); static void TearDownTestSuite(); void requireThatLookupIsWorking(const EmptySettings& empty_settings); - void requireThatWeCanReadPostingList(); + void requireThatWeCanReadPostingList(const IOSettings& io_settings); void require_that_we_can_get_field_length_info(); void requireThatWeCanReadBitVector(); void requireThatBlueprintIsCreated(); @@ -138,6 +140,8 @@ class DiskIndexTest : public ::testing::Test, public TestDiskIndex { void build_index(const IOSettings& io_settings, const EmptySettings& empty_settings); void test_empty_settings(const EmptySettings& empty_settings); void test_io_settings(const IOSettings& io_settings); + SimpleResult search(const FieldIndex& field_index, const DictionaryLookupResult& lookup_result, + const PostingListHandle& handle); }; DiskIndexTest::DiskIndexTest() = default; @@ -243,16 +247,35 @@ DiskIndexTest::requireThatLookupIsWorking(const EmptySettings& empty_settings) } } -void -DiskIndexTest::requireThatWeCanReadPostingList() +SimpleResult +DiskIndexTest::search(const FieldIndex& field_index, const DictionaryLookupResult& lookup_result, + const PostingListHandle& handle) { TermFieldMatchDataArray mda; + auto sb = field_index.create_iterator(lookup_result, handle, mda); + return SimpleResult().search(*sb); +} + + +void +DiskIndexTest::requireThatWeCanReadPostingList(const IOSettings& io_settings) +{ { // field 'f1' auto r = _index->lookup(0, "w1"); auto& field_index = _index->get_field_index(0); auto h = field_index.read_posting_list(r); - auto sb = field_index.create_iterator(r, h, mda); - EXPECT_EQ(SimpleResult({1,3}), SimpleResult().search(*sb)); + if (field_index.is_posting_list_cache_enabled()) { + EXPECT_GT(64, h._allocSize); + } + EXPECT_EQ(SimpleResult({1,3}), search(field_index, r, h)); + if (io_settings._use_directio && !io_settings._use_mmap) { + auto directio_handle = field_index.read_uncached_posting_list(r, false); + EXPECT_LT(256, directio_handle._allocSize); + EXPECT_EQ(SimpleResult({1,3}), search(field_index, r, directio_handle)); + auto trimmed_directio_handle = field_index.read_uncached_posting_list(r, true); + EXPECT_GT(64, trimmed_directio_handle._allocSize); + EXPECT_EQ(SimpleResult({1,3}), search(field_index, r, trimmed_directio_handle)); + } } } @@ -289,7 +312,7 @@ DiskIndexTest::requireThatWeCanReadBitVector() auto& field_index = _index->get_field_index(1); auto blr = field_index.lookup_bit_vector(r); EXPECT_TRUE(blr.valid()); - BitVector::UP bv = field_index.read_bit_vector(blr); + auto bv = field_index.read_bit_vector(blr); EXPECT_TRUE(bv.get() != nullptr); EXPECT_TRUE(*bv == *exp); } @@ -425,7 +448,7 @@ DiskIndexTest::build_index(const IOSettings& io_settings, const EmptySettings& e void DiskIndexTest::require_that_get_stats_works() { - auto stats = getIndex().get_stats(); + auto stats = getIndex().get_stats(false); auto& schema = getIndex().getSchema(); EXPECT_LT(0, stats.sizeOnDisk()); auto field_stats = stats.get_field_stats(); @@ -452,7 +475,7 @@ DiskIndexTest::test_io_settings(const IOSettings& io_settings) EmptySettings empty_settings; build_index(io_settings, empty_settings); requireThatLookupIsWorking(empty_settings); - requireThatWeCanReadPostingList(); + requireThatWeCanReadPostingList(io_settings); require_that_we_can_get_field_length_info(); requireThatWeCanReadBitVector(); requireThatBlueprintIsCreated(); diff --git a/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp b/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp index a776b9da70e4..665310eb9070 100644 --- a/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp +++ b/searchlib/src/tests/diskindex/posting_list_cache/posting_list_cache_test.cpp @@ -1,9 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include #include #include #include +using search::BitVector; using search::diskindex::PostingListCache; using search::index::PostingListHandle; @@ -13,7 +15,8 @@ class MockFile : public PostingListCache::IPostingListFileBacking { public: MockFile(); ~MockFile() override; - PostingListHandle read(const PostingListCache::Key& key) const override; + PostingListHandle read(const PostingListCache::Key& key, PostingListCache::Context& ctx) const override; + std::shared_ptr read(const PostingListCache::BitVectorKey& key, PostingListCache::Context& ctx) const override; }; MockFile::MockFile() @@ -24,35 +27,55 @@ MockFile::MockFile() MockFile::~MockFile() = default; PostingListHandle -MockFile::read(const PostingListCache::Key& key) const +MockFile::read(const PostingListCache::Key& key, PostingListCache::Context& ctx) const { EXPECT_NE(0, key.bit_length); + ctx.cache_miss = true; PostingListHandle handle; handle._allocSize = key.bit_length / 8; return handle; } +std::shared_ptr +MockFile::read(const PostingListCache::BitVectorKey& key, PostingListCache::Context& ctx) const +{ + EXPECT_NE(0, key.lookup_result.idx); + ctx.cache_miss = true; + return BitVector::create(100 * key.file_id + key.lookup_result.idx); +} + } class PostingListCacheTest : public ::testing::Test { protected: using Key = PostingListCache::Key; + using BitVectorKey = PostingListCache::BitVectorKey; MockFile _mock_file; PostingListCache _cache; Key _key; + BitVectorKey _bv_key; + PostingListCache::Context _ctx; PostingListCacheTest(); ~PostingListCacheTest() override; - PostingListHandle read() { return _cache.read(_key); } + PostingListHandle read() { + _ctx.cache_miss = false; + return _cache.read(_key, _ctx); + } + std::shared_ptr read_bv() { + _ctx.cache_miss = false; + return _cache.read(_bv_key, _ctx); + } }; PostingListCacheTest::PostingListCacheTest() : ::testing::Test(), _mock_file(), - _cache(256_Ki), - _key() + _cache(256_Ki, 256_Ki), + _key(), + _bv_key(), + _ctx(&_mock_file) { - _key.backing_store_file = &_mock_file; } PostingListCacheTest::~PostingListCacheTest() = default; @@ -61,8 +84,11 @@ TEST_F(PostingListCacheTest, repeated_lookups_gives_hit) { _key.bit_length = 24 * 8; auto handle = read(); + EXPECT_TRUE(_ctx.cache_miss); auto handle2 = read(); + EXPECT_FALSE(_ctx.cache_miss); auto handle3 = read(); + EXPECT_FALSE(_ctx.cache_miss); EXPECT_EQ(24, handle._allocSize); auto stats = _cache.get_stats(); EXPECT_EQ(1, stats.misses); @@ -104,4 +130,20 @@ TEST_F(PostingListCacheTest, file_id_is_part_of_key) EXPECT_EQ(2, stats.elements); } +TEST_F(PostingListCacheTest, repeated_bitvector_lookup_gives_hit) +{ + _bv_key.lookup_result.idx = 1; + _bv_key.file_id = 2; + auto bv = read_bv(); + EXPECT_TRUE(_ctx.cache_miss); + auto bv2 = read_bv(); + EXPECT_FALSE(_ctx.cache_miss); + EXPECT_EQ(bv, bv2); + auto stats = _cache.get_bitvector_stats(); + EXPECT_EQ(1, stats.misses); + EXPECT_EQ(1, stats.hits); + EXPECT_EQ(1, stats.elements); + EXPECT_EQ(PostingListCache::bitvector_element_size() + bv->get_allocated_bytes(true), stats.memory_used); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp index 8f88b108e977..2354f5bdfb1c 100644 --- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp +++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -36,7 +36,7 @@ using document::DataType; using document::Document; using document::FieldValue; using search::FieldIndexStats; -using search::SearchableStats; +using search::IndexStats; using search::ScheduleTaskCallback; using search::index::FieldLengthInfo; using search::index::IFieldLengthInspector; @@ -471,7 +471,7 @@ TEST(MemoryIndexTest, require_that_num_docs_and_doc_id_limit_is_returned) namespace { -FieldIndexStats get_field_stats(const SearchableStats &stats, const std::string& field_name) +FieldIndexStats get_field_stats(const IndexStats &stats, const std::string& field_name) { auto itr = stats.get_field_stats().find(field_name); return itr == stats.get_field_stats().end() ? FieldIndexStats() : itr->second; diff --git a/searchlib/src/tests/util/searchable_stats/.gitignore b/searchlib/src/tests/util/index_stats/.gitignore similarity index 100% rename from searchlib/src/tests/util/searchable_stats/.gitignore rename to searchlib/src/tests/util/index_stats/.gitignore diff --git a/searchlib/src/tests/util/index_stats/CMakeLists.txt b/searchlib/src/tests/util/index_stats/CMakeLists.txt new file mode 100644 index 000000000000..9bc9f792f240 --- /dev/null +++ b/searchlib/src/tests/util/index_stats/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_index_stats_test_app TEST + SOURCES + index_stats_test.cpp + DEPENDS + vespa_searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_index_stats_test_app COMMAND searchlib_index_stats_test_app) diff --git a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp b/searchlib/src/tests/util/index_stats/index_stats_test.cpp similarity index 74% rename from searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp rename to searchlib/src/tests/util/index_stats/index_stats_test.cpp index 8d3fff4a7363..b84dbdbfd4f3 100644 --- a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp +++ b/searchlib/src/tests/util/index_stats/index_stats_test.cpp @@ -1,21 +1,18 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include +#include #include -#include -LOG_SETUP("searchable_stats_test"); - using namespace search; -TEST(SearchableStatsTest, stats_can_be_merged) +TEST(IndexStatsTest, stats_can_be_merged) { - SearchableStats stats; + IndexStats stats; EXPECT_EQ(0u, stats.memoryUsage().allocatedBytes()); EXPECT_EQ(0u, stats.docsInMemory()); EXPECT_EQ(0u, stats.sizeOnDisk()); EXPECT_EQ(0u, stats.fusion_size_on_disk()); { - SearchableStats rhs; + IndexStats rhs; EXPECT_EQ(&rhs.memoryUsage(vespalib::MemoryUsage(100,0,0,0)), &rhs); EXPECT_EQ(&rhs.docsInMemory(10), &rhs); EXPECT_EQ(&rhs.sizeOnDisk(1000), &rhs); @@ -27,7 +24,7 @@ TEST(SearchableStatsTest, stats_can_be_merged) EXPECT_EQ(1000u, stats.sizeOnDisk()); EXPECT_EQ(500u, stats.fusion_size_on_disk()); - stats.merge(SearchableStats() + stats.merge(IndexStats() .memoryUsage(vespalib::MemoryUsage(150,0,0,0)) .docsInMemory(15) .sizeOnDisk(1500) @@ -38,9 +35,9 @@ TEST(SearchableStatsTest, stats_can_be_merged) EXPECT_EQ(1300u, stats.fusion_size_on_disk()); } -TEST(SearchableStatsTest, field_stats_can_be_merged) +TEST(IndexStatsTest, field_stats_can_be_merged) { - SearchableStats base_stats; + IndexStats base_stats; auto read_2_once_stats = DiskIoStats().read_operations(1).read_bytes_total(2).read_bytes_min(2).read_bytes_max(2); auto read_1000_once_stats = DiskIoStats().read_operations(1).read_bytes_total(1000).read_bytes_min(1000). read_bytes_max(1000); @@ -49,21 +46,21 @@ TEST(SearchableStatsTest, field_stats_can_be_merged) auto read_mixed_5_stats = DiskIoStats().read_operations(5).read_bytes_total(7000).read_bytes_min(1000). read_bytes_max(2700); auto f1_stats = FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000). - cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats)); + io_stats(FieldIndexIoStats().read(read_1000_once_stats)); auto f2_stats1 = FieldIndexStats().memory_usage({400, 200, 60, 10}).size_on_disk(1500). - cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats)); + io_stats(FieldIndexIoStats().read(read_1000_once_stats)); auto f2_stats2 = FieldIndexStats().memory_usage({300, 100, 40, 5}).size_on_disk(500). - cache_disk_io_stats(CacheDiskIoStats().read(read_mixed_4_stats).cached_read(read_2_once_stats)); + io_stats(FieldIndexIoStats().read(read_mixed_4_stats).cached_read(read_2_once_stats)); auto f2_stats3 = FieldIndexStats().memory_usage({700, 300, 100, 15}).size_on_disk(2000). - cache_disk_io_stats(CacheDiskIoStats().read(read_mixed_5_stats).cached_read(read_2_once_stats)); + io_stats(FieldIndexIoStats().read(read_mixed_5_stats).cached_read(read_2_once_stats)); auto f3_stats = FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500). - cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats)); + io_stats(FieldIndexIoStats().read(read_1000_once_stats)); base_stats.add_field_stats("f1", f1_stats).add_field_stats("f2", f2_stats1); - SearchableStats added_stats; + IndexStats added_stats; added_stats.add_field_stats("f2", f2_stats2).add_field_stats("f3", f3_stats); - SearchableStats act_stats = base_stats; + IndexStats act_stats = base_stats; act_stats.merge(added_stats); - SearchableStats exp_stats; + IndexStats exp_stats; exp_stats.add_field_stats("f1", f1_stats). add_field_stats("f2", f2_stats3). add_field_stats("f3", f3_stats); diff --git a/searchlib/src/tests/util/searchable_stats/CMakeLists.txt b/searchlib/src/tests/util/searchable_stats/CMakeLists.txt deleted file mode 100644 index a091f32ef0f5..000000000000 --- a/searchlib/src/tests/util/searchable_stats/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_searchable_stats_test_app TEST - SOURCES - searchable_stats_test.cpp - DEPENDS - vespa_searchlib - GTest::GTest -) -vespa_add_test(NAME searchlib_searchable_stats_test_app COMMAND searchlib_searchable_stats_test_app) diff --git a/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp b/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp index 7be5ce84a012..08d08c6723ef 100644 --- a/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp @@ -108,4 +108,14 @@ AllocatedBitVector::resize(Index newLength) clear(); } +size_t +AllocatedBitVector::get_allocated_bytes(bool include_self) const noexcept +{ + size_t result = extraByteSize(); + if (include_self) { + result += sizeof(AllocatedBitVector); + } + return result; +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/common/allocatedbitvector.h b/searchlib/src/vespa/searchlib/common/allocatedbitvector.h index 9884e389dee9..f4dcad7fca36 100644 --- a/searchlib/src/vespa/searchlib/common/allocatedbitvector.h +++ b/searchlib/src/vespa/searchlib/common/allocatedbitvector.h @@ -58,6 +58,8 @@ class AllocatedBitVector : public BitVector */ void resize(Index newLength); + size_t get_allocated_bytes(bool include_self) const noexcept override; + protected: Index _capacityBits; Alloc _alloc; diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index ea514abaafca..abe67efd4bfa 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -3,10 +3,12 @@ #include "bitvector.h" #include "allocatedbitvector.h" #include "partialbitvector.h" +#include "read_stats.h" #include #include #include #include +#include #include #include #include @@ -365,6 +367,8 @@ class MMappedBitVector : public BitVector MMappedBitVector(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount); + size_t get_allocated_bytes(bool include_self) const noexcept override; + private: void read(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount); @@ -372,10 +376,12 @@ class MMappedBitVector : public BitVector BitVector::UP BitVector::create(Index numberOfElements, FastOS_FileInterface &file, - int64_t offset, Index doccount) + int64_t offset, Index doccount, ReadStats& read_stats) { UP bv; if (file.IsMemoryMapped()) { + size_t pad_before = offset - vespalib::round_down_to_page_boundary(offset); + read_stats.read_bytes = vespalib::round_up_to_page_size(pad_before + getFileBytes(numberOfElements)); bv = std::make_unique(numberOfElements, file, offset, doccount); } else { size_t padbefore, padafter; @@ -385,7 +391,8 @@ BitVector::create(Index numberOfElements, FastOS_FileInterface &file, AllocatedBitVector::Alloc alloc = Alloc::alloc(padbefore + vectorsize + padafter, MMAP_LIMIT, FileSettings::DIRECTIO_ALIGNMENT); void * alignedBuffer = alloc.get(); - file.ReadBuf(alignedBuffer, alloc.size(), offset - padbefore); + file.ReadBuf(alignedBuffer, padbefore + vectorsize + padafter, offset - padbefore); + read_stats.read_bytes = padbefore + vectorsize + padafter; bv = std::make_unique(numberOfElements, std::move(alloc), padbefore); bv->setTrueBits(doccount); // Check guard bit for getNextTrueBit() @@ -450,6 +457,12 @@ MMappedBitVector::read(Index numberOfElements, FastOS_FileInterface &file, setTrueBits(doccount); } +size_t +MMappedBitVector::get_allocated_bytes(bool include_self) const noexcept +{ + return include_self ? sizeof(MMappedBitVector) : 0; +} + nbostream & operator<<(nbostream &out, const BitVector &bv) { diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h index 3d51ea3eb15e..8067cc96d9b9 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.h +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -21,6 +21,7 @@ class FastOS_FileInterface; namespace search { class PartialBitVector; +struct ReadStats; class AllocatedBitVector; class BitVector : protected BitWord @@ -277,7 +278,7 @@ class BitVector : protected BitWord * @param offset Where bitvector image is located in the file. * @param doccount Number of bits set in bitvector */ - static UP create(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount); + static UP create(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount, ReadStats& read_stats); static UP create(Index start, Index end); static UP create(const BitVector & org, Index start, Index end); static UP create(Index numberOfElements); @@ -291,6 +292,7 @@ class BitVector : protected BitWord static void parallellOr(vespalib::ThreadBundle & thread_bundle, std::span vectors); static Index numWords(Index bits) noexcept { return wordNum(bits + 1 + (WordLen - 1)); } static Index numBytes(Index bits) noexcept { return numWords(bits) * sizeof(Word); } + virtual size_t get_allocated_bytes(bool include_self) const noexcept = 0; protected: using Alloc = vespalib::alloc::Alloc; VESPA_DLL_LOCAL BitVector(void * buf, Index start, Index end) noexcept; diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.cpp b/searchlib/src/vespa/searchlib/common/partialbitvector.cpp index 1ec0c10e411b..3f521842ca50 100644 --- a/searchlib/src/vespa/searchlib/common/partialbitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/partialbitvector.cpp @@ -39,4 +39,14 @@ PartialBitVector::PartialBitVector(const BitVector & org, Index start, Index end PartialBitVector::~PartialBitVector() = default; +size_t +PartialBitVector::get_allocated_bytes(bool include_self) const noexcept +{ + size_t result = _alloc.size(); + if (include_self) { + result += sizeof(PartialBitVector); + } + return result; +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.h b/searchlib/src/vespa/searchlib/common/partialbitvector.h index 4cd4e94bf8a8..76c66f31682c 100644 --- a/searchlib/src/vespa/searchlib/common/partialbitvector.h +++ b/searchlib/src/vespa/searchlib/common/partialbitvector.h @@ -27,6 +27,8 @@ class PartialBitVector : public BitVector ~PartialBitVector() override; + size_t get_allocated_bytes(bool include_self) const noexcept override; + private: vespalib::alloc::Alloc _alloc; }; diff --git a/searchlib/src/vespa/searchlib/common/read_stats.h b/searchlib/src/vespa/searchlib/common/read_stats.h new file mode 100644 index 000000000000..4f33a0ee3893 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/read_stats.h @@ -0,0 +1,24 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +/* + * Struct passed to read functions to pick up information about read + * stats. + */ +struct ReadStats +{ + uint64_t read_bytes; // bytes read from disk or bytes in pages containing the data + ReadStats() noexcept + : read_bytes(0) + { } + void clear() noexcept { + read_bytes = 0; + } +}; + +} diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp index 8ea41148be9b..e9dce0f06e5a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp @@ -3,6 +3,7 @@ #include "bitvectordictionary.h" #include #include +#include #include #include #include @@ -21,7 +22,8 @@ BitVectorDictionary::BitVectorDictionary() _entries(), _vectorSize(0u), _datFile(), - _datHeaderLen(0u) + _datHeaderLen(0u), + _memory_mapped(false) { } BitVectorDictionary::~BitVectorDictionary() = default; @@ -83,6 +85,7 @@ BitVectorDictionary::open(const std::string &pathPrefix, vespalib::FileHeader datHeader(64); _datHeaderLen = datHeader.readFile(*_datFile); assert(_datFile->getSize() >= static_cast(_vectorSize * _entries.size() + _datHeaderLen)); + _memory_mapped = (_datFile->MemoryMapPtr(0) != nullptr); return true; } @@ -98,13 +101,20 @@ BitVectorDictionary::lookup(uint64_t wordNum) { } std::unique_ptr -BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_result) +BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_result, ReadStats& read_stats) { if (!lookup_result.valid()) { return {}; } int64_t offset = ((int64_t) _vectorSize) * lookup_result.idx + _datHeaderLen; - return BitVector::create(_docIdLimit, *_datFile, offset, _entries[lookup_result.idx]._numDocs); + return BitVector::create(_docIdLimit, *_datFile, offset, _entries[lookup_result.idx]._numDocs, read_stats); +} + +std::unique_ptr +BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_result) +{ + ReadStats read_stats; + return read_bitvector(lookup_result, read_stats); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h index b3ce7a183468..76f8c5d039bc 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h @@ -11,6 +11,7 @@ class FastOS_FileInterface; namespace search { class BitVector; } +namespace search { struct ReadStats; } namespace search::diskindex { @@ -29,6 +30,7 @@ class BitVectorDictionary size_t _vectorSize; std::unique_ptr _datFile; uint32_t _datHeaderLen; + bool _memory_mapped; public: using SP = std::shared_ptr; @@ -62,13 +64,17 @@ class BitVectorDictionary * Load and return the associated bit vector if lookup result is valid. * * @param lookup_result the result returned from lookup. + * @param read_stats statistics to be updated when reading bit vector * @return the loaded bit vector or empty if lookup result was invalid. **/ + std::unique_ptr read_bitvector(index::BitVectorDictionaryLookupResult lookup_result, + ReadStats &read_stats); std::unique_ptr read_bitvector(index::BitVectorDictionaryLookupResult lookup_result); - uint32_t getDocIdLimit() const { return _docIdLimit; } + uint32_t getDocIdLimit() const noexcept { return _docIdLimit; } - const std::vector & getEntries() const { return _entries; } + const std::vector & getEntries() const noexcept { return _entries; } + bool get_memory_mapped() const noexcept { return _memory_mapped; } }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index f579aecceed6..9c620d6f932c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -398,14 +398,14 @@ DiskIndex::get_field_length_info(const std::string& field_name) const } } -SearchableStats -DiskIndex::get_stats() const +IndexStats +DiskIndex::get_stats(bool clear_disk_io_stats) const { - SearchableStats stats; + IndexStats stats; uint64_t size_on_disk = _nonfield_size_on_disk; uint32_t field_id = 0; for (auto& field_index : _field_indexes) { - auto field_stats = field_index.get_stats(); + auto field_stats = field_index.get_stats(clear_disk_io_stats); size_on_disk += field_stats.size_on_disk(); stats.add_field_stats(_schema.getIndexField(field_id).getName(), field_stats); ++field_id; diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h index 13eec5cece96..132666b9558e 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.h +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h @@ -4,7 +4,7 @@ #include "field_index.h" #include -#include +#include #include #include #include @@ -110,7 +110,7 @@ class DiskIndex : public queryeval::Searchable { /** * Get stats for this index. */ - SearchableStats get_stats() const; + IndexStats get_stats(bool clear_disk_io_stats) const; const index::Schema &getSchema() const { return _schema; } const std::string &getIndexDir() const { return _indexDir; } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index 08578fce9f08..f6ccd0894107 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include LOG_SETUP(".diskindex.disktermblueprint"); @@ -51,7 +52,9 @@ DiskTermBlueprint::DiskTermBlueprint(const FieldSpec & field, _useBitVector(useBitVector), _fetchPostingsDone(false), _postingHandle(), - _bitVector() + _bitVector(), + _mutex(), + _late_bitvector() { setEstimate(HitEstimate(_lookupRes.counts._numDocs, _lookupRes.counts._numDocs == 0)); @@ -62,8 +65,10 @@ DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo) { (void) execInfo; if (!_fetchPostingsDone) { - _bitVector = _field_index.read_bit_vector(_bitvector_lookup_result); - if (!_useBitVector || !_bitVector) { + if (_useBitVector && _bitvector_lookup_result.valid()) { + _bitVector = _field_index.read_bit_vector(_bitvector_lookup_result); + } + if (!_bitVector) { _postingHandle = _field_index.read_posting_list(_lookupRes); } } @@ -77,13 +82,27 @@ DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const return {rel_est, disk_index_cost(rel_est), disk_index_strict_cost(rel_est)}; } +const BitVector * +DiskTermBlueprint::get_bitvector() const +{ + if (_bitVector) { + return _bitVector.get(); + } + std::lock_guard guard(_mutex); + if (!_late_bitvector) { + _late_bitvector = _field_index.read_bit_vector(_bitvector_lookup_result); + assert(_late_bitvector); + } + return _late_bitvector.get(); +} + SearchIterator::UP DiskTermBlueprint::createLeafSearch(const TermFieldMatchDataArray & tfmda) const { - if (_bitVector && (_useBitVector || tfmda[0]->isNotNeeded())) { + if (_bitvector_lookup_result.valid() && (_useBitVector || tfmda[0]->isNotNeeded())) { LOG(debug, "Return BitVectorIterator: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")", getName(_field_index.get_field_id()).c_str(), _lookupRes.wordNum, _lookupRes.counts._numDocs); - return BitVectorIterator::create(_bitVector.get(), *tfmda[0], strict()); + return BitVectorIterator::create(get_bitvector(), *tfmda[0], strict()); } auto search(_field_index.create_iterator(_lookupRes, _postingHandle, tfmda)); if (_useBitVector) { @@ -101,8 +120,8 @@ DiskTermBlueprint::createFilterSearch(FilterConstraint) const { auto wrapper = std::make_unique(getState().numFields()); auto & tfmda = wrapper->tfmda(); - if (_bitVector) { - wrapper->wrap(BitVectorIterator::create(_bitVector.get(), *tfmda[0], strict())); + if (_bitvector_lookup_result.valid()) { + wrapper->wrap(BitVectorIterator::create(get_bitvector(), *tfmda[0], strict())); } else { wrapper->wrap(_field_index.create_iterator(_lookupRes, _postingHandle, tfmda)); } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h index 1b0196914f4f..1eb20f72f86f 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h @@ -21,8 +21,11 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint bool _useBitVector; bool _fetchPostingsDone; index::PostingListHandle _postingHandle; - BitVector::UP _bitVector; + std::shared_ptr _bitVector; + mutable std::mutex _mutex; + mutable std::shared_ptr _late_bitvector; + const BitVector* get_bitvector() const; public: /** * Create a new blueprint. diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index dac7b1cf7a5f..c7a550ac19fc 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -4,8 +4,10 @@ #include "fileheader.h" #include "pagedict4randread.h" #include +#include #include #include +#include #include #include @@ -32,13 +34,13 @@ const std::vector field_file_names{ std::atomic FieldIndex::_file_id_source(0); -FieldIndex::LockedCacheDiskIoStats::LockedCacheDiskIoStats() noexcept +FieldIndex::LockedFieldIndexIoStats::LockedFieldIndexIoStats() noexcept : _stats(), _mutex() { } -FieldIndex::LockedCacheDiskIoStats::~LockedCacheDiskIoStats() = default; +FieldIndex::LockedFieldIndexIoStats::~LockedFieldIndexIoStats() = default; FieldIndex::FieldIndex() : _posting_file(), @@ -46,8 +48,10 @@ FieldIndex::FieldIndex() _dict(), _file_id(0), _size_on_disk(0), - _cache_disk_io_stats(std::make_shared()), + _io_stats(std::make_shared()), _posting_list_cache(), + _posting_list_cache_enabled(false), + _bitvector_cache_enabled(false), _field_id(0) { } @@ -57,6 +61,8 @@ FieldIndex::FieldIndex(uint32_t field_id, std::shared_ptr pos { _field_id = field_id; _posting_list_cache = std::move(posting_list_cache); + _posting_list_cache_enabled = _posting_list_cache && _posting_list_cache->enabled_for_posting_lists(); + _bitvector_cache_enabled = _posting_list_cache && _posting_list_cache->enabled_for_bitvectors(); } FieldIndex::FieldIndex(FieldIndex&&) = default; @@ -140,10 +146,12 @@ FieldIndex::open(const std::string& field_dir, const TuneFileSearch& tune_file_s } bDict = std::make_shared(); - // Always memory map bitvectors for now - auto force_mmap = tune_file_search._read; - force_mmap.setWantMemoryMap(); - if (!bDict->open(field_dir, force_mmap, BitVectorKeyScope::PERFIELD_WORDS)) { + // memory map bitvectors unless bitvector cache is enabled + auto maybe_force_mmap = tune_file_search._read; + if (!_bitvector_cache_enabled) { + maybe_force_mmap.setWantMemoryMap(); + } + if (!bDict->open(field_dir, maybe_force_mmap, BitVectorKeyScope::PERFIELD_WORDS)) { LOG(warning, "Could not open bit vector dictionary in '%s'", field_dir.c_str()); return false; } @@ -161,27 +169,29 @@ FieldIndex::reuse_files(const FieldIndex& rhs) _bit_vector_dict = rhs._bit_vector_dict; _file_id = rhs._file_id; _size_on_disk = rhs._size_on_disk; - _cache_disk_io_stats = rhs._cache_disk_io_stats; + _io_stats = rhs._io_stats; } PostingListHandle -FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_result) const +FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_result, bool trim) const { auto handle = _posting_file->read_posting_list(lookup_result); - if (handle._read_bytes != 0) { - _cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes); + assert(handle._read_bytes != 0); + _io_stats->add_uncached_read_operation(handle._read_bytes); + if (trim) { + _posting_file->consider_trim_posting_list(lookup_result, handle, 0.2); // Trim posting list if more than 20% bloat } return handle; } PostingListHandle -FieldIndex::read(const IPostingListCache::Key& key) const +FieldIndex::read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const { + ctx.cache_miss = true; DictionaryLookupResult lookup_result; lookup_result.bitOffset = key.bit_offset; lookup_result.counts._bitLength = key.bit_length; - key.backing_store_file = nullptr; // Signal cache miss back to layer above cache - return read_uncached_posting_list(lookup_result); + return read_uncached_posting_list(lookup_result, true); } PostingListHandle @@ -191,18 +201,18 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const if (file == nullptr || lookup_result.counts._bitLength == 0) { return {}; } - if (file->getMemoryMapped() || !_posting_list_cache) { - return read_uncached_posting_list(lookup_result); + if (file->getMemoryMapped() || !_posting_list_cache_enabled) { + return read_uncached_posting_list(lookup_result, false); } IPostingListCache::Key key; - key.backing_store_file = this; key.file_id = _file_id; key.bit_offset = lookup_result.bitOffset; key.bit_length = lookup_result.counts._bitLength; - auto result = _posting_list_cache->read(key); - auto cache_hit = key.backing_store_file == this; - if (cache_hit && result._read_bytes != 0) { - _cache_disk_io_stats->add_cached_read_operation(result._read_bytes); + IPostingListCache::Context ctx(this); + auto result = _posting_list_cache->read(key, ctx); + if (!ctx.cache_miss) { + assert(result._read_bytes != 0); + _io_stats->add_cached_read_operation(result._read_bytes); } return result; } @@ -216,13 +226,41 @@ FieldIndex::lookup_bit_vector(const DictionaryLookupResult& lookup_result) const return _bit_vector_dict->lookup(lookup_result.wordNum); } -std::unique_ptr +std::shared_ptr +FieldIndex::read_uncached_bit_vector(BitVectorDictionaryLookupResult lookup_result) const +{ + ReadStats read_stats; + auto result = _bit_vector_dict->read_bitvector(lookup_result, read_stats); + assert(read_stats.read_bytes != 0); + _io_stats->add_uncached_read_operation(read_stats.read_bytes); + return result; +} + +std::shared_ptr +FieldIndex::read(const IPostingListCache::BitVectorKey& key, IPostingListCache::Context& ctx) const +{ + ctx.cache_miss = true; + return read_uncached_bit_vector(key.lookup_result); +} + +std::shared_ptr FieldIndex::read_bit_vector(BitVectorDictionaryLookupResult lookup_result) const { - if (!_bit_vector_dict) { + if (!_bit_vector_dict || !lookup_result.valid()) { return {}; } - return _bit_vector_dict->read_bitvector(lookup_result); + if (_bit_vector_dict->get_memory_mapped() || !_bitvector_cache_enabled) { + return read_uncached_bit_vector(lookup_result); + } + IPostingListCache::BitVectorKey key; + key.file_id = _file_id; + key.lookup_result = lookup_result; + IPostingListCache::Context ctx(this); + auto result = _posting_list_cache->read(key, ctx); + if (!ctx.cache_miss) { + _io_stats->add_cached_read_operation(result->getFileBytes()); + } + return result; } std::unique_ptr @@ -241,10 +279,10 @@ FieldIndex::get_field_length_info() const } FieldIndexStats -FieldIndex::get_stats() const +FieldIndex::get_stats(bool clear_disk_io_stats) const { - auto cache_disk_io_stats = _cache_disk_io_stats->read_and_clear(); - return FieldIndexStats().size_on_disk(_size_on_disk).cache_disk_io_stats(cache_disk_io_stats); + auto io_stats = _io_stats->read_and_maybe_clear(clear_disk_io_stats); + return FieldIndexStats().size_on_disk(_size_on_disk).io_stats(io_stats); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index 3fe002ab6775..6aaf09113c77 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -25,13 +25,13 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { using DiskPostingFileReal = Zc4PosOccRandRead; using DiskPostingFileDynamicKReal = ZcPosOccRandRead; - class LockedCacheDiskIoStats { - CacheDiskIoStats _stats; + class LockedFieldIndexIoStats { + FieldIndexIoStats _stats; std::mutex _mutex; public: - LockedCacheDiskIoStats() noexcept; - ~LockedCacheDiskIoStats(); + LockedFieldIndexIoStats() noexcept; + ~LockedFieldIndexIoStats(); void add_uncached_read_operation(uint64_t bytes) { std::lock_guard guard(_mutex); @@ -42,9 +42,9 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { _stats.add_cached_read_operation(bytes); } - CacheDiskIoStats read_and_clear() { + FieldIndexIoStats read_and_maybe_clear(bool clear_disk_io_stats) { std::lock_guard guard(_mutex); - return _stats.read_and_clear(); + return _stats.read_and_maybe_clear(clear_disk_io_stats); } }; @@ -53,8 +53,10 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { std::unique_ptr _dict; uint64_t _file_id; uint64_t _size_on_disk; - std::shared_ptr _cache_disk_io_stats; + std::shared_ptr _io_stats; std::shared_ptr _posting_list_cache; + bool _posting_list_cache_enabled; + bool _bitvector_cache_enabled; static std::atomic _file_id_source; uint32_t _field_id; @@ -70,19 +72,23 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { bool open_dictionary(const std::string& field_dir, const TuneFileSearch& tune_file_search); bool open(const std::string& field_dir, const TuneFileSearch &tune_file_search); void reuse_files(const FieldIndex& rhs); - index::PostingListHandle read_uncached_posting_list(const search::index::DictionaryLookupResult& lookup_result) const; - index::PostingListHandle read(const IPostingListCache::Key& key) const override; + index::PostingListHandle read_uncached_posting_list(const search::index::DictionaryLookupResult &lookup_result, + bool trim) const; + index::PostingListHandle read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const override; index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const; index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const; - std::unique_ptr read_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; + std::shared_ptr read_uncached_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; + std::shared_ptr read(const IPostingListCache::BitVectorKey& key, IPostingListCache::Context& ctx) const override; + std::shared_ptr read_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; std::unique_ptr create_iterator(const search::index::DictionaryLookupResult& lookup_result, const index::PostingListHandle& handle, const search::fef::TermFieldMatchDataArray& tfmda) const; index::FieldLengthInfo get_field_length_info() const; index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); } - FieldIndexStats get_stats() const; + FieldIndexStats get_stats(bool clear_disk_io_stats) const; uint32_t get_field_id() const noexcept { return _field_id; } + bool is_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; } }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h b/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h index b3c982c76f73..4f9d29494356 100644 --- a/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h +++ b/searchlib/src/vespa/searchlib/diskindex/i_posting_list_cache.h @@ -2,10 +2,13 @@ #pragma once +#include #include #include #include +namespace search { class BitVector; } + namespace search::diskindex { /* @@ -15,11 +18,10 @@ class IPostingListCache { public: class IPostingListFileBacking; struct Key { - mutable const IPostingListFileBacking* backing_store_file; // Used by backing store on cache miss uint64_t file_id; uint64_t bit_offset; uint64_t bit_length; - Key() noexcept : backing_store_file(nullptr), file_id(0), bit_offset(0), bit_length(0) { } + Key() noexcept : file_id(0), bit_offset(0), bit_length(0) { } size_t hash() const noexcept { return std::rotl(file_id, 40) + bit_offset; } bool operator==(const Key& rhs) const noexcept { // Don't check backing_store_file, it is just passed in key for convenience @@ -28,17 +30,41 @@ class IPostingListCache { bit_length == rhs.bit_length; } }; + struct BitVectorKey { + uint64_t file_id; + index::BitVectorDictionaryLookupResult lookup_result; + BitVectorKey() noexcept : file_id(0), lookup_result() { } + size_t hash() const noexcept { return std::rotl(file_id, 40) + lookup_result.idx; } + bool operator==(const BitVectorKey& rhs) const noexcept { + return file_id == rhs.file_id && lookup_result.idx == rhs.lookup_result.idx; + } + }; + struct Context { + const IPostingListFileBacking* const backing_store_file; + bool cache_miss; + + Context(const IPostingListFileBacking *backing_store_file_in) noexcept + : backing_store_file(backing_store_file_in), + cache_miss(false) + { + } + }; /* * Interface class for reading posting list on cache miss. */ class IPostingListFileBacking { public: virtual ~IPostingListFileBacking() = default; - virtual search::index::PostingListHandle read(const Key& key) const = 0; + virtual search::index::PostingListHandle read(const Key& key, Context& ctx) const = 0; + virtual std::shared_ptr read(const BitVectorKey& key, Context& ctx) const = 0; }; virtual ~IPostingListCache() = default; - virtual search::index::PostingListHandle read(const Key& key) const = 0; + virtual search::index::PostingListHandle read(const Key& key, Context& ctx) const = 0; + virtual std::shared_ptr read(const BitVectorKey& key, Context& ctx) const = 0; virtual vespalib::CacheStats get_stats() const = 0; + virtual vespalib::CacheStats get_bitvector_stats() const = 0; + virtual bool enabled_for_posting_lists() const noexcept = 0; + virtual bool enabled_for_bitvectors() const noexcept = 0; }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp index 9e09e3028d3a..f81b2427a5d6 100644 --- a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.cpp @@ -1,11 +1,14 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "posting_list_cache.h" +#include #include #include #include #include +#include +using search::index::BitVectorDictionaryLookupResult; using search::index::DictionaryLookupResult; using search::index::PostingListHandle; @@ -16,17 +19,25 @@ class PostingListCache::BackingStore public: BackingStore(); ~BackingStore(); - bool read(const IPostingListCache::Key& key, PostingListHandle& value) const; + bool read(const Key& key, PostingListHandle& value, Context& ctx) const; + bool read(const BitVectorKey& key, std::shared_ptr& value, Context& ctx) const; }; PostingListCache::BackingStore::BackingStore() = default; PostingListCache::BackingStore::~BackingStore() = default; bool -PostingListCache::BackingStore::read(const IPostingListCache::Key& key, PostingListHandle& value) const +PostingListCache::BackingStore::read(const Key& key, PostingListHandle& value, Context& ctx) const { // TODO: Store a smaller copy if posting list is small - value = key.backing_store_file->read(key); + value = ctx.backing_store_file->read(key, ctx); + return true; +} + +bool +PostingListCache::BackingStore::read(const BitVectorKey& key, std::shared_ptr& value, Context& ctx) const +{ + value = ctx.backing_store_file->read(key, ctx); return true; } @@ -56,19 +67,52 @@ PostingListCache::Cache::Cache(BackingStore& backing_store, size_t max_bytes) PostingListCache::Cache::~Cache() = default; -PostingListCache::PostingListCache(size_t max_bytes) +struct BitVectorCacheValueSize { + size_t operator() (const std::shared_ptr& bv) const noexcept { return bv->get_allocated_bytes(true); } +}; + +using BitVectorCacheParams = vespalib::CacheParam< + vespalib::LruParam>, + const PostingListCache::BackingStore, + vespalib::zero, + BitVectorCacheValueSize +>; + +class PostingListCache::BitVectorCache : public vespalib::cache { +public: + using Parent = vespalib::cache; + BitVectorCache(BackingStore& backing_store, size_t max_bytes); + ~BitVectorCache(); + static size_t element_size() { return sizeof(value_type); } +}; + +PostingListCache::BitVectorCache::BitVectorCache(BackingStore& backing_store, size_t max_bytes) + : Parent(backing_store, max_bytes) +{ +} + +PostingListCache::BitVectorCache::~BitVectorCache() = default; + +PostingListCache::PostingListCache(size_t max_bytes, size_t bitvector_max_bytes) : IPostingListCache(), _backing_store(std::make_unique()), - _cache(std::make_unique(*_backing_store, max_bytes)) + _cache(std::make_unique(*_backing_store, max_bytes)), + _bitvector_cache(std::make_unique(*_backing_store, bitvector_max_bytes)) { } PostingListCache::~PostingListCache() = default; PostingListHandle -PostingListCache::read(const Key& key) const +PostingListCache::read(const Key& key, Context& ctx) const { - return _cache->read(key); + return _cache->read(key, ctx); +} + +std::shared_ptr +PostingListCache::read(const BitVectorKey& key, Context& ctx) const +{ + return _bitvector_cache->read(key, ctx); } vespalib::CacheStats @@ -77,10 +121,34 @@ PostingListCache::get_stats() const return _cache->get_stats(); } +vespalib::CacheStats +PostingListCache::get_bitvector_stats() const +{ + return _bitvector_cache->get_stats(); +} + +bool +PostingListCache::enabled_for_posting_lists() const noexcept +{ + return _cache->capacityBytes() != 0; +} + +bool +PostingListCache::enabled_for_bitvectors() const noexcept +{ + return _bitvector_cache->capacityBytes() != 0; +} + size_t PostingListCache::element_size() { return PostingListCache::Cache::element_size(); } +size_t +PostingListCache::bitvector_element_size() +{ + return PostingListCache::BitVectorCache::element_size(); +} + } diff --git a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h index b809b09549fc..6599ae281226 100644 --- a/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h +++ b/searchlib/src/vespa/searchlib/diskindex/posting_list_cache.h @@ -15,14 +15,21 @@ class PostingListCache : public IPostingListCache { class BackingStore; private: class Cache; + class BitVectorCache; std::unique_ptr _backing_store; std::unique_ptr _cache; + std::unique_ptr _bitvector_cache; public: - PostingListCache(size_t max_bytes); + PostingListCache(size_t max_bytes, size_t bitvector_max_bytes); ~PostingListCache() override; - search::index::PostingListHandle read(const Key& key) const override; + search::index::PostingListHandle read(const Key& key, Context& ctx) const override; + std::shared_ptr read(const BitVectorKey& key, Context& ctx) const override; vespalib::CacheStats get_stats() const override; + vespalib::CacheStats get_bitvector_stats() const override; + bool enabled_for_posting_lists() const noexcept override; + bool enabled_for_bitvectors() const noexcept override; static size_t element_size(); + static size_t bitvector_element_size(); }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp index 0537aa320ef2..233a144b39a2 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,8 @@ ZcPosOccRandRead::ZcPosOccRandRead() _fileBitSize(0), _headerBitSize(0), _fieldsParams() -{ } +{ +} ZcPosOccRandRead::~ZcPosOccRandRead() @@ -92,32 +94,32 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) uint64_t startOffset = (lookup_result.bitOffset + _headerBitSize) >> 3; // Align start at 64-bit boundary startOffset -= (startOffset & 7); + uint64_t endOffset = (lookup_result.bitOffset + _headerBitSize + + lookup_result.counts._bitLength + 7) >> 3; + // Align end at 64-bit boundary + endOffset += (-endOffset & 7); void *mapPtr = _file->MemoryMapPtr(startOffset); if (mapPtr != nullptr) { handle._mem = mapPtr; + size_t pad_before = startOffset - vespalib::round_down_to_page_boundary(startOffset); + handle._read_bytes = vespalib::round_up_to_page_size(pad_before + endOffset - startOffset + decode_prefetch_size); } else { - uint64_t endOffset = (lookup_result.bitOffset + _headerBitSize + - lookup_result.counts._bitLength + 7) >> 3; - // Align end at 64-bit boundary - endOffset += (-endOffset & 7); - uint64_t vectorLen = endOffset - startOffset; size_t padBefore; size_t padAfter; size_t padExtraAfter; // Decode prefetch space _file->DirectIOPadding(startOffset, vectorLen, padBefore, padAfter); padExtraAfter = 0; - if (padAfter < 16) { - padExtraAfter = 16 - padAfter; + if (padAfter < decode_prefetch_size) { + padExtraAfter = decode_prefetch_size - padAfter; } size_t mallocLen = padBefore + vectorLen + padAfter + padExtraAfter; - void *mallocStart = nullptr; void *alignedBuffer = nullptr; if (mallocLen > 0) { - alignedBuffer = _file->AllocateDirectIOBuffer(mallocLen, mallocStart); - assert(mallocStart != nullptr); + alignedBuffer = _file->AllocateDirectIOBuffer(mallocLen); + assert(alignedBuffer != nullptr); assert(endOffset + padAfter + padExtraAfter <= _fileSize); _file->ReadBuf(alignedBuffer, padBefore + vectorLen + padAfter, @@ -130,7 +132,7 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) padExtraAfter); } handle._mem = static_cast(alignedBuffer) + padBefore; - handle._allocMem = std::shared_ptr(mallocStart, free); + handle._allocMem = std::shared_ptr(alignedBuffer, free); handle._allocSize = mallocLen; handle._read_bytes = padBefore + vectorLen + padAfter; } @@ -138,6 +140,37 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) return handle; } +void +ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, + double bloat_factor) const +{ + if (lookup_result.counts._bitLength == 0 || _memoryMapped) { + return; + } + uint64_t start_offset = (lookup_result.bitOffset + _headerBitSize) >> 3; + // Align start at 64-bit boundary + start_offset -= (start_offset & 7); + uint64_t end_offset = (lookup_result.bitOffset + _headerBitSize + + lookup_result.counts._bitLength + 7) >> 3; + // Align end at 64-bit boundary + end_offset += (-end_offset & 7); + size_t malloc_len = end_offset - start_offset + decode_prefetch_size; + if (handle._allocSize == malloc_len) { + assert(handle._allocMem.get() == handle._mem); + return; + } + assert(handle._allocSize >= malloc_len); + if (handle._allocSize <= malloc_len * (1.0 + bloat_factor)) { + return; + } + auto *mem = malloc(malloc_len); + assert(mem != nullptr); + memcpy(mem, handle._mem, malloc_len); + handle._allocMem = std::shared_ptr(mem, free); + handle._mem = mem; + handle._allocSize = malloc_len; + handle._read_bytes = end_offset - start_offset; +} bool ZcPosOccRandRead:: @@ -157,6 +190,7 @@ open(const std::string &name, const TuneFileRandRead &tuneFileRead) _fileSize = _file->getSize(); readHeader(); + afterOpen(*_file); return true; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h index fd69786ac51d..63da1cf883be 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h @@ -22,6 +22,8 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead uint64_t _headerBitSize; bitcompression::PosOccFieldsParams _fieldsParams; + static constexpr size_t decode_prefetch_size = 16; + public: ZcPosOccRandRead(); ~ZcPosOccRandRead(); @@ -42,6 +44,8 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead * Read (possibly partial) posting list into handle. */ PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; + void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, + double bloat_factor) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; diff --git a/searchlib/src/vespa/searchlib/engine/proto_converter.cpp b/searchlib/src/vespa/searchlib/engine/proto_converter.cpp index 519b4f7785ee..c8333db91264 100644 --- a/searchlib/src/vespa/searchlib/engine/proto_converter.cpp +++ b/searchlib/src/vespa/searchlib/engine/proto_converter.cpp @@ -237,7 +237,11 @@ ProtoConverter::docsum_reply_to_proto(const DocsumReply &reply, ProtoDocsumReply if (reply.hasResult()) { vespalib::SmartBuffer buf(4_Ki); vespalib::slime::BinaryFormat::encode(reply.slime(), buf); - proto.set_slime_summaries(buf.obtain().data, buf.obtain().size); + if (buf.obtain().size < 2_Gi - 4_Ki) { + proto.set_slime_summaries(buf.obtain().data, buf.obtain().size); + } else { + proto.add_errors()->set_message("Error: DocsumReply too big, > 2GB"); + } } if (reply.hasIssues()) { reply.issues().for_each_message([&](const std::string &err_msg) diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp index 12bd62af9ef2..ce3f3994e2af 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp @@ -110,6 +110,13 @@ PostingListFileRandReadPassThrough::read_posting_list(const DictionaryLookupResu return _lower->read_posting_list(lookup_result); } +void +PostingListFileRandReadPassThrough::consider_trim_posting_list(const DictionaryLookupResult &lookup_result, + PostingListHandle &handle, double bloat_factor) const +{ + return _lower->consider_trim_posting_list(lookup_result, handle, bloat_factor); +} + bool PostingListFileRandReadPassThrough::open(const std::string &name, const TuneFileRandRead &tuneFileRead) diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h index 8ddfc2dc12cc..29aa44de08b3 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -159,8 +159,13 @@ class PostingListFileRandRead { /** * Read posting list into handle. */ - virtual PostingListHandle - read_posting_list(const DictionaryLookupResult& lookup_result) = 0; + virtual PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) = 0; + + /** + * Remove directio padding from posting list if bloat is excessive. + */ + virtual void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, + double bloat_factor) const = 0; /** * Open posting list file for random read. @@ -199,6 +204,8 @@ class PostingListFileRandReadPassThrough : public PostingListFileRandRead { const search::fef::TermFieldMatchDataArray &matchData) const override; PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; + void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, + double bloat_factor) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp index d78eaac8eb5f..84d5db25f40f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp @@ -59,10 +59,10 @@ FieldIndexCollection::getMemoryUsage() const return usage; } -SearchableStats +IndexStats FieldIndexCollection::get_stats(const index::Schema& schema) const { - SearchableStats stats; + IndexStats stats; vespalib::MemoryUsage memory_usage; for (uint32_t field_id = 0; field_id < _numFields; ++field_id) { auto &field_index = _fieldIndexes[field_id]; diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h index 5c8ae2603396..dd7faac4d437 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h @@ -4,7 +4,7 @@ #include "i_field_index_collection.h" #include "i_field_index.h" -#include +#include #include #include @@ -47,7 +47,7 @@ class FieldIndexCollection : public IFieldIndexCollection { void dump(search::index::IndexBuilder & indexBuilder); vespalib::MemoryUsage getMemoryUsage() const; - SearchableStats get_stats(const index::Schema& schema) const; + IndexStats get_stats(const index::Schema& schema) const; IFieldIndex *getFieldIndex(uint32_t fieldId) const { return _fieldIndexes[fieldId].get(); diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp index d550e068c8a5..16e7c43f2508 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp @@ -213,7 +213,7 @@ MemoryIndex::getMemoryUsage() const return usage; } -SearchableStats +IndexStats MemoryIndex::get_stats() const { auto stats = _fieldIndexes->get_stats(_schema); diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h index 45bd037e41d7..c95870d631d2 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -170,7 +170,7 @@ class MemoryIndex : public queryeval::Searchable { */ vespalib::MemoryUsage getMemoryUsage() const; - SearchableStats get_stats() const; + IndexStats get_stats() const; uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; } diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp index 4d862b9b76a4..04e8a7316c86 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp @@ -13,10 +13,12 @@ #include #include #include +#include #include #include +#include #include -#include +#include #include #include #include @@ -34,19 +36,47 @@ namespace search::tensor { inline namespace loader { +class Event { +private: + vespalib::JSONStringer jstr; +public: + Event(const TensorAttribute& attr) : jstr() { + jstr.beginObject(); + jstr.appendKey("name").appendString(attr.getName()); + } + Event& addKV(const char* key, const char* value) { + jstr.appendKey(key).appendString(value); + return *this; + } + Event& addKV(const char* key, double value) { + jstr.appendKey(key).appendDouble(value); + return *this; + } + void log(const char *eventName) { + jstr.endObject(); + EV_STATE(eventName, jstr.str().c_str()); + } +}; + constexpr uint32_t LOAD_COMMIT_INTERVAL = 256; const std::string tensorTypeTag("tensortype"); -bool -can_use_index_save_file(const search::attribute::Config &config, const AttributeHeader& header) +bool can_use_index_save_file(const std::string& attrName, + const search::attribute::Config &config, + const AttributeHeader& header) { if (!config.hnsw_index_params().has_value() || !header.get_hnsw_index_params().has_value()) { + LOG(warning, "Attribute %s cannot use saved HNSW index for ANN (missing parameters)", + attrName.c_str()); return false; } const auto &config_params = config.hnsw_index_params().value(); const auto &header_params = header.get_hnsw_index_params().value(); if ((config_params.max_links_per_node() != header_params.max_links_per_node()) || - (config_params.distance_metric() != header_params.distance_metric())) { + (config_params.distance_metric() != header_params.distance_metric())) + { + LOG(warning, "Attribute %s cannot use saved HNSW index for ANN, index parameters have changed", + attrName.c_str()); return false; } return true; @@ -258,16 +288,32 @@ TensorAttributeLoader::build_index(vespalib::Executor* executor, uint32_t docid_ std::unique_ptr builder; if (executor != nullptr) { builder = std::make_unique(_attr, _generation_handler, _store, *_index, *executor); + Event(_attr).addKV("execution", "multi-threaded").log("hnsw.index.rebuild.start"); } else { builder = std::make_unique(_attr, *_index); + Event(_attr).addKV("execution", "single-threaded").log("hnsw.index.rebuild.start"); } + constexpr vespalib::duration report_interval = 60s; + auto beforeStamp = vespalib::steady_clock::now(); + auto last_report = beforeStamp; for (uint32_t lid = 0; lid < docid_limit; ++lid) { auto ref = _ref_vector[lid].load_relaxed(); if (ref.valid()) { builder->add(lid); + auto now = vespalib::steady_clock::now(); + if (last_report + report_interval < now) { + Event(_attr) + .addKV("percent", (lid * 100.0 / docid_limit)) + .log("hnsw.index.rebuild.progress"); + last_report = now; + } } } builder->wait_complete(); + vespalib::duration elapsedTime = vespalib::steady_clock::now() - beforeStamp; + Event(_attr) + .addKV("time.elapsed.ms", vespalib::count_ms(elapsedTime)) + .log("hnsw.index.rebuild.complete"); _attr.commit(); } @@ -330,7 +376,7 @@ TensorAttributeLoader::on_load(vespalib::Executor* executor) bool use_index_file = false; if (has_index_file(_attr)) { auto header = AttributeHeader::extractTags(reader.getDatHeader(), _attr.getBaseFileName()); - use_index_file = can_use_index_save_file(_attr.getConfig(), header); + use_index_file = can_use_index_save_file(_attr.getName(), _attr.getConfig(), header); } if (use_index_file) { if (!load_index()) { @@ -357,4 +403,4 @@ TensorAttributeLoader::check_consistency(uint32_t docid_limit) inconsistencies, _attr.getName().c_str(), elapsed); } -} +} // namespace search::tensor diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h index 89a07c03de5f..968a60fe9e99 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h @@ -44,4 +44,3 @@ class TensorAttributeLoader { }; } - diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp index 4a2df1976943..f1db6da8fe3f 100644 --- a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp +++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp @@ -120,7 +120,7 @@ TestDiskIndex::openIndex(const std::string &dir, bool directio, bool readmmap, b } std::shared_ptr posting_list_cache; if (use_posting_list_cache) { - posting_list_cache = std::make_shared(256_Ki); + posting_list_cache = std::make_shared(256_Ki, 256_Ki); } _index = std::make_unique(dir, posting_list_cache); bool ok(_index->setup(tuneFileRead)); diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt index 9d192abad82d..8916565bd492 100644 --- a/searchlib/src/vespa/searchlib/util/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt @@ -2,13 +2,13 @@ vespa_add_library(searchlib_util OBJECT SOURCES bufferwriter.cpp - cache_disk_io_stats.cpp comprbuffer.cpp comprfile.cpp data_buffer_writer.cpp disk_io_stats.cpp dirtraverse.cpp drainingbufferwriter.cpp + field_index_io_stats.cpp field_index_stats.cpp file_with_header.cpp filealign.cpp @@ -17,10 +17,10 @@ vespa_add_library(searchlib_util OBJECT filesizecalculator.cpp fileutil.cpp foldedstringcompare.cpp + index_stats.cpp linguisticsannotation.cpp logutil.cpp rawbuf.cpp - searchable_stats.cpp slime_output_raw_buf_adapter.cpp state_explorer_utils.cpp token_extractor.cpp diff --git a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.cpp b/searchlib/src/vespa/searchlib/util/field_index_io_stats.cpp similarity index 69% rename from searchlib/src/vespa/searchlib/util/cache_disk_io_stats.cpp rename to searchlib/src/vespa/searchlib/util/field_index_io_stats.cpp index 8ec55719f3ad..64f111c08d5e 100644 --- a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.cpp +++ b/searchlib/src/vespa/searchlib/util/field_index_io_stats.cpp @@ -1,11 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "cache_disk_io_stats.h" +#include "field_index_io_stats.h" #include namespace search { -std::ostream& operator<<(std::ostream& os, const CacheDiskIoStats& stats) { +std::ostream& operator<<(std::ostream& os, const FieldIndexIoStats& stats) { os << "{read: " << stats.read() << ", cached_read: " << stats.cached_read() << "}"; return os; } diff --git a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h b/searchlib/src/vespa/searchlib/util/field_index_io_stats.h similarity index 57% rename from searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h rename to searchlib/src/vespa/searchlib/util/field_index_io_stats.h index cd6f6d891853..5b65d3c163e8 100644 --- a/searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h +++ b/searchlib/src/vespa/searchlib/util/field_index_io_stats.h @@ -6,33 +6,39 @@ namespace search { /* - * Class tracking disk io when using a cache. + * Class tracking disk io for a single field. */ -class CacheDiskIoStats { +class FieldIndexIoStats { DiskIoStats _read; // cache miss DiskIoStats _cached_read; // cache hit public: - CacheDiskIoStats() noexcept + FieldIndexIoStats() noexcept : _read(), _cached_read() { } - CacheDiskIoStats& read(const DiskIoStats& value) { _read = value; return *this; } - CacheDiskIoStats& cached_read(DiskIoStats& value) { _cached_read = value; return *this; } + FieldIndexIoStats& read(const DiskIoStats& value) { _read = value; return *this; } + FieldIndexIoStats& cached_read(DiskIoStats& value) { _cached_read = value; return *this; } const DiskIoStats& read() const noexcept { return _read; } const DiskIoStats& cached_read() const noexcept { return _cached_read; } - void merge(const CacheDiskIoStats& rhs) noexcept { + void merge(const FieldIndexIoStats& rhs) noexcept { _read.merge(rhs.read()); _cached_read.merge(rhs.cached_read()); } - bool operator==(const CacheDiskIoStats &rhs) const noexcept { + bool operator==(const FieldIndexIoStats &rhs) const noexcept { return _read == rhs.read() && _cached_read == rhs.cached_read(); } - CacheDiskIoStats read_and_clear() noexcept { auto result = *this; clear(); return result; } + FieldIndexIoStats read_and_maybe_clear(bool clear_disk_io_stats) noexcept { + auto result = *this; + if (clear_disk_io_stats) { + clear(); + } + return result; + } void clear() noexcept { _read.clear(); _cached_read.clear(); @@ -41,6 +47,6 @@ class CacheDiskIoStats { void add_cached_read_operation(uint64_t bytes) noexcept { _cached_read.add_read_operation(bytes); } }; -std::ostream& operator<<(std::ostream& os, const CacheDiskIoStats& stats); +std::ostream& operator<<(std::ostream& os, const FieldIndexIoStats& stats); } diff --git a/searchlib/src/vespa/searchlib/util/field_index_stats.cpp b/searchlib/src/vespa/searchlib/util/field_index_stats.cpp index 96f57bae2b30..c28363ce7585 100644 --- a/searchlib/src/vespa/searchlib/util/field_index_stats.cpp +++ b/searchlib/src/vespa/searchlib/util/field_index_stats.cpp @@ -7,7 +7,7 @@ namespace search { std::ostream& operator<<(std::ostream& os, const FieldIndexStats& stats) { os << "{memory: " << stats.memory_usage() << ", disk: " << stats.size_on_disk() << - ", diskio: " << stats.cache_disk_io_stats() << "}"; + ", diskio: " << stats.io_stats() << "}"; return os; } diff --git a/searchlib/src/vespa/searchlib/util/field_index_stats.h b/searchlib/src/vespa/searchlib/util/field_index_stats.h index 9c153bcf4ca1..8b14de204514 100644 --- a/searchlib/src/vespa/searchlib/util/field_index_stats.h +++ b/searchlib/src/vespa/searchlib/util/field_index_stats.h @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include "cache_disk_io_stats.h" +#include "field_index_io_stats.h" #include #include @@ -15,13 +15,13 @@ class FieldIndexStats private: vespalib::MemoryUsage _memory_usage; size_t _size_on_disk; // in bytes - CacheDiskIoStats _cache_disk_io_stats; + FieldIndexIoStats _io_stats; public: FieldIndexStats() noexcept : _memory_usage(), _size_on_disk(0), - _cache_disk_io_stats() + _io_stats() {} FieldIndexStats &memory_usage(const vespalib::MemoryUsage &usage) noexcept { _memory_usage = usage; @@ -34,19 +34,19 @@ class FieldIndexStats } size_t size_on_disk() const noexcept { return _size_on_disk; } - FieldIndexStats& cache_disk_io_stats(const CacheDiskIoStats& stats) { _cache_disk_io_stats = stats; return *this; } - const CacheDiskIoStats& cache_disk_io_stats() const noexcept { return _cache_disk_io_stats; } + FieldIndexStats& io_stats(const FieldIndexIoStats& stats) { _io_stats = stats; return *this; } + const FieldIndexIoStats& io_stats() const noexcept { return _io_stats; } void merge(const FieldIndexStats &rhs) noexcept { _memory_usage.merge(rhs._memory_usage); _size_on_disk += rhs._size_on_disk; - _cache_disk_io_stats.merge(rhs._cache_disk_io_stats); + _io_stats.merge(rhs._io_stats); } bool operator==(const FieldIndexStats& rhs) const noexcept { return _memory_usage == rhs._memory_usage && - _size_on_disk == rhs._size_on_disk && - _cache_disk_io_stats == rhs._cache_disk_io_stats; + _size_on_disk == rhs._size_on_disk && + _io_stats == rhs._io_stats; } }; diff --git a/searchlib/src/vespa/searchlib/util/searchable_stats.cpp b/searchlib/src/vespa/searchlib/util/index_stats.cpp similarity index 75% rename from searchlib/src/vespa/searchlib/util/searchable_stats.cpp rename to searchlib/src/vespa/searchlib/util/index_stats.cpp index d82e1ccf3e3f..2fbc11dfa10c 100644 --- a/searchlib/src/vespa/searchlib/util/searchable_stats.cpp +++ b/searchlib/src/vespa/searchlib/util/index_stats.cpp @@ -1,11 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "searchable_stats.h" +#include "index_stats.h" #include namespace search { -SearchableStats::SearchableStats() +IndexStats::IndexStats() : _memoryUsage(), _docsInMemory(0), _sizeOnDisk(0), @@ -14,10 +14,10 @@ SearchableStats::SearchableStats() { } -SearchableStats::~SearchableStats() = default; +IndexStats::~IndexStats() = default; -SearchableStats& -SearchableStats::merge(const SearchableStats &rhs) { +IndexStats& +IndexStats::merge(const IndexStats &rhs) { _memoryUsage.merge(rhs._memoryUsage); _docsInMemory += rhs._docsInMemory; _sizeOnDisk += rhs._sizeOnDisk; @@ -29,7 +29,7 @@ SearchableStats::merge(const SearchableStats &rhs) { } bool -SearchableStats::operator==(const SearchableStats& rhs) const noexcept +IndexStats::operator==(const IndexStats& rhs) const noexcept { return _memoryUsage == rhs._memoryUsage && _docsInMemory == rhs._docsInMemory && @@ -38,14 +38,14 @@ SearchableStats::operator==(const SearchableStats& rhs) const noexcept _field_stats == rhs._field_stats; } -SearchableStats& -SearchableStats::add_field_stats(const std::string& name, const FieldIndexStats& stats) +IndexStats& +IndexStats::add_field_stats(const std::string& name, const FieldIndexStats& stats) { _field_stats[name].merge(stats); return *this; } -std::ostream& operator<<(std::ostream& os, const SearchableStats& stats) { +std::ostream& operator<<(std::ostream& os, const IndexStats& stats) { os << "{memory: " << stats.memoryUsage() << ", docsInMemory: " << stats.docsInMemory() << ", disk: " << stats.sizeOnDisk() << ", fusion_size_on_disk: " << stats.fusion_size_on_disk() << ", "; os << "fields: {"; diff --git a/searchlib/src/vespa/searchlib/util/searchable_stats.h b/searchlib/src/vespa/searchlib/util/index_stats.h similarity index 62% rename from searchlib/src/vespa/searchlib/util/searchable_stats.h rename to searchlib/src/vespa/searchlib/util/index_stats.h index 1cbd7645bbdd..b8ee7786efc6 100644 --- a/searchlib/src/vespa/searchlib/util/searchable_stats.h +++ b/searchlib/src/vespa/searchlib/util/index_stats.h @@ -7,11 +7,11 @@ namespace search { /** - * Simple statistics for a single Searchable component or multiple components that are merged together. + * Simple statistics for a single index or for multiple indexes (merged stats). * * E.g. used for internal aggregation before inserting numbers into the metrics framework. **/ -class SearchableStats +class IndexStats { private: vespalib::MemoryUsage _memoryUsage; @@ -21,35 +21,35 @@ class SearchableStats std::map _field_stats; public: - SearchableStats(); - ~SearchableStats(); - SearchableStats &memoryUsage(const vespalib::MemoryUsage &usage) { + IndexStats(); + ~IndexStats(); + IndexStats &memoryUsage(const vespalib::MemoryUsage &usage) { _memoryUsage = usage; return *this; } const vespalib::MemoryUsage &memoryUsage() const { return _memoryUsage; } - SearchableStats &docsInMemory(size_t value) { + IndexStats &docsInMemory(size_t value) { _docsInMemory = value; return *this; } size_t docsInMemory() const { return _docsInMemory; } - SearchableStats &sizeOnDisk(size_t value) { + IndexStats &sizeOnDisk(size_t value) { _sizeOnDisk = value; return *this; } size_t sizeOnDisk() const { return _sizeOnDisk; } - SearchableStats& fusion_size_on_disk(size_t value) { + IndexStats& fusion_size_on_disk(size_t value) { _fusion_size_on_disk = value; return *this; } size_t fusion_size_on_disk() const { return _fusion_size_on_disk; } - SearchableStats& merge(const SearchableStats& rhs); - bool operator==(const SearchableStats& rhs) const noexcept; - SearchableStats& add_field_stats(const std::string& name, const FieldIndexStats& stats); + IndexStats& merge(const IndexStats& rhs); + bool operator==(const IndexStats& rhs) const noexcept; + IndexStats& add_field_stats(const std::string& name, const FieldIndexStats& stats); const std::map& get_field_stats() const noexcept { return _field_stats; } }; -std::ostream& operator<<(std::ostream& os, const SearchableStats& stats); +std::ostream& operator<<(std::ostream& os, const IndexStats& stats); } diff --git a/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java b/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java index 0cfc988249e9..5ed207eac614 100644 --- a/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java +++ b/security-utils/src/main/java/com/yahoo/security/KeyAlgorithm.java @@ -9,8 +9,10 @@ * @author bjorncs */ public enum KeyAlgorithm { + RSA("RSA", null), - EC("EC", new ECGenParameterSpec("prime256v1")); // TODO Make curve configurable + EC("EC", new ECGenParameterSpec("prime256v1")), + XDH("XDH", new ECGenParameterSpec("X25519")); final String algorithmName; private final AlgorithmParameterSpec spec; @@ -25,4 +27,18 @@ String getAlgorithmName() { } Optional getSpec() { return Optional.ofNullable(spec); } + + public static KeyAlgorithm from(String name) { + for (var algorithm : values()) { + if (name.equals(algorithm.getAlgorithmName())) { + return algorithm; + } else if (algorithm == XDH && name.equals("X25519")) { + // "XDH" is the name used by the JDK for elliptic curve keys using Curve25519, while BouncyCastle uses + // "X25519" + return algorithm; + } + } + throw new IllegalArgumentException("Unknown key algorithm '" + name + "'"); + } + } diff --git a/security-utils/src/main/java/com/yahoo/security/KeyUtils.java b/security-utils/src/main/java/com/yahoo/security/KeyUtils.java index 0cccd05121dc..d22e0269b4c0 100644 --- a/security-utils/src/main/java/com/yahoo/security/KeyUtils.java +++ b/security-utils/src/main/java/com/yahoo/security/KeyUtils.java @@ -3,6 +3,7 @@ import org.bouncycastle.asn1.ASN1Encodable; import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.edec.EdECObjectIdentifiers; import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers; import org.bouncycastle.asn1.pkcs.PrivateKeyInfo; import org.bouncycastle.asn1.x509.AlgorithmIdentifier; @@ -50,6 +51,7 @@ import static com.yahoo.security.KeyAlgorithm.EC; import static com.yahoo.security.KeyAlgorithm.RSA; +import static com.yahoo.security.KeyAlgorithm.XDH; /** * @author bjorncs @@ -78,23 +80,30 @@ public static KeyPair generateKeypair(KeyAlgorithm algorithm) { } public static PublicKey extractPublicKey(PrivateKey privateKey) { - String algorithm = privateKey.getAlgorithm(); + KeyAlgorithm keyAlgorithm = KeyAlgorithm.from(privateKey.getAlgorithm()); try { - if (algorithm.equals(RSA.getAlgorithmName())) { - KeyFactory keyFactory = createKeyFactory(RSA); - RSAPrivateCrtKey rsaPrivateCrtKey = (RSAPrivateCrtKey) privateKey; - RSAPublicKeySpec keySpec = new RSAPublicKeySpec(rsaPrivateCrtKey.getModulus(), rsaPrivateCrtKey.getPublicExponent()); - return keyFactory.generatePublic(keySpec); - } else if (algorithm.equals(EC.getAlgorithmName())) { - KeyFactory keyFactory = createKeyFactory(EC); - BCECPrivateKey ecPrivateKey = (BCECPrivateKey) privateKey; - ECParameterSpec ecParameterSpec = ecPrivateKey.getParameters(); - ECPoint ecPoint = new FixedPointCombMultiplier().multiply(ecParameterSpec.getG(), ecPrivateKey.getD()); - ECPublicKeySpec keySpec = new ECPublicKeySpec(ecPoint, ecParameterSpec); - return keyFactory.generatePublic(keySpec); - } else { - throw new IllegalArgumentException("Unexpected key algorithm: " + algorithm); - } + return switch (keyAlgorithm) { + case RSA -> { + KeyFactory keyFactory = createKeyFactory(RSA); + RSAPrivateCrtKey rsaPrivateCrtKey = (RSAPrivateCrtKey) privateKey; + RSAPublicKeySpec keySpec = new RSAPublicKeySpec(rsaPrivateCrtKey.getModulus(), rsaPrivateCrtKey.getPublicExponent()); + yield keyFactory.generatePublic(keySpec); + } + case EC -> { + KeyFactory keyFactory = createKeyFactory(EC); + BCECPrivateKey ecPrivateKey = (BCECPrivateKey) privateKey; + ECParameterSpec ecParameterSpec = ecPrivateKey.getParameters(); + ECPoint ecPoint = new FixedPointCombMultiplier().multiply(ecParameterSpec.getG(), ecPrivateKey.getD()); + ECPublicKeySpec keySpec = new ECPublicKeySpec(ecPoint, ecParameterSpec); + yield keyFactory.generatePublic(keySpec); + } + case XDH -> { + byte[] privScalar = toRawX25519PrivateKeyBytes((XECPrivateKey) privateKey); + byte[] pubPoint = new byte[X25519.POINT_SIZE]; + X25519.generatePublicKey(privScalar, 0, pubPoint, 0); // scalarMultBase => public key point + yield fromRawX25519PublicKey(pubPoint); + } + }; } catch (GeneralSecurityException e) { throw new RuntimeException(e); } @@ -127,7 +136,7 @@ public static PrivateKey fromPemEncodedPrivateKey(String pem) { unknownObjects.add(pemObject); } } - throw new IllegalArgumentException("Expected a private key, but found " + unknownObjects.toString()); + throw new IllegalArgumentException("Expected a private key, but found " + unknownObjects); } catch (IOException e) { throw new UncheckedIOException(e); } catch (GeneralSecurityException e) { @@ -168,14 +177,10 @@ public static String toPem(PrivateKey privateKey) { } public static String toPem(PrivateKey privateKey, KeyFormat format) { - switch (format) { - case PKCS1: - return toPkcs1Pem(privateKey); - case PKCS8: - return toPkcs8Pem(privateKey); - default: - throw new IllegalArgumentException("Unknown format: " + format); - } + return switch (format) { + case PKCS1 -> toPkcs1Pem(privateKey); + case PKCS8 -> toPkcs8Pem(privateKey); + }; } public static String toPem(PublicKey publicKey) { @@ -190,15 +195,12 @@ public static String toPem(PublicKey publicKey) { private static String toPkcs1Pem(PrivateKey privateKey) { try (StringWriter stringWriter = new StringWriter(); JcaPEMWriter pemWriter = new JcaPEMWriter(stringWriter)) { - String algorithm = privateKey.getAlgorithm(); - String type; - if (algorithm.equals(RSA.getAlgorithmName())) { - type = "RSA PRIVATE KEY"; - } else if (algorithm.equals(EC.getAlgorithmName())) { - type = "EC PRIVATE KEY"; - } else { - throw new IllegalArgumentException("Unexpected key algorithm: " + algorithm); - } + KeyAlgorithm keyAlgorithm = KeyAlgorithm.from(privateKey.getAlgorithm()); + String type = switch (keyAlgorithm) { + case RSA -> "RSA PRIVATE KEY"; + case EC -> "EC PRIVATE KEY"; + case XDH -> throw new IllegalArgumentException("Cannot use PKCS#1 for X25519 key"); + }; pemWriter.writeObject(new PemObject(type, getPkcs1Bytes(privateKey))); pemWriter.flush(); return stringWriter.toString(); @@ -227,9 +229,11 @@ private static byte[] getPkcs1Bytes(PrivateKey privateKey) throws IOException{ private static KeyFactory createKeyFactory(AlgorithmIdentifier algorithm) throws NoSuchAlgorithmException { if (X9ObjectIdentifiers.id_ecPublicKey.equals(algorithm.getAlgorithm())) { - return createKeyFactory(KeyAlgorithm.EC); + return createKeyFactory(EC); } else if (PKCSObjectIdentifiers.rsaEncryption.equals(algorithm.getAlgorithm())) { - return createKeyFactory(KeyAlgorithm.RSA); + return createKeyFactory(RSA); + } else if (EdECObjectIdentifiers.id_X25519.equals(algorithm.getAlgorithm())) { + return createKeyFactory(XDH); } else { throw new IllegalArgumentException("Unknown key algorithm: " + algorithm); } @@ -338,21 +342,14 @@ public static String toBase58EncodedX25519PrivateKey(XECPrivateKey privateKey) { return Base58.codec().encode(toRawX25519PrivateKeyBytes(privateKey)); } - // TODO unify with generateKeypair()? + // TODO: In-line and remove public static KeyPair generateX25519KeyPair() { - try { - return KeyPairGenerator.getInstance("X25519").generateKeyPair(); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } + return generateKeypair(XDH); } - // TODO unify with extractPublicKey() + // TODO: In-line and remove public static XECPublicKey extractX25519PublicKey(XECPrivateKey privateKey) { - byte[] privScalar = toRawX25519PrivateKeyBytes(privateKey); - byte[] pubPoint = new byte[X25519.POINT_SIZE]; - X25519.generatePublicKey(privScalar, 0, pubPoint, 0); // scalarMultBase => public key point - return fromRawX25519PublicKey(pubPoint); + return (XECPublicKey) extractPublicKey(privateKey); } /** diff --git a/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java b/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java index aa1e9861a67f..8bd150d08e75 100644 --- a/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java +++ b/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java @@ -13,6 +13,7 @@ import static com.yahoo.security.ArrayUtils.unhex; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -55,26 +56,34 @@ void can_serialize_and_deserialize_ec_privatekey_using_pkcs8_pem_format() { testPrivateKeySerialization(KeyAlgorithm.EC, KeyFormat.PKCS8, "PRIVATE KEY"); } + @Test + void can_serialize_and_deserialize_x25519_private_key_using_pkcs8_pem_format() { + testPrivateKeySerialization(KeyAlgorithm.XDH, KeyFormat.PKCS8, "PRIVATE KEY"); + } + @Test void can_serialize_and_deserialize_rsa_publickey_using_pem_format() { - KeyPair keyPair = KeyUtils.generateKeypair(KeyAlgorithm.RSA); - String pem = KeyUtils.toPem(keyPair.getPublic()); - assertTrue(pem.contains("BEGIN PUBLIC KEY")); - assertTrue(pem.contains("END PUBLIC KEY")); - PublicKey deserializedKey = KeyUtils.fromPemEncodedPublicKey(pem); - assertEquals(keyPair.getPublic(), deserializedKey); - assertEquals(KeyAlgorithm.RSA.getAlgorithmName(), deserializedKey.getAlgorithm()); + testPublicKeySerialization(KeyAlgorithm.RSA); } @Test void can_serialize_and_deserialize_ec_publickey_using_pem_format() { - KeyPair keyPair = KeyUtils.generateKeypair(KeyAlgorithm.EC); + testPublicKeySerialization(KeyAlgorithm.EC); + } + + @Test + void can_serialize_and_deserialize_x25519_publickey_using_pem_format() { + testPublicKeySerialization(KeyAlgorithm.XDH); + } + + private static void testPublicKeySerialization(KeyAlgorithm keyAlgorithm) { + KeyPair keyPair = KeyUtils.generateKeypair(keyAlgorithm); String pem = KeyUtils.toPem(keyPair.getPublic()); assertTrue(pem.contains("BEGIN PUBLIC KEY")); assertTrue(pem.contains("END PUBLIC KEY")); PublicKey deserializedKey = KeyUtils.fromPemEncodedPublicKey(pem); assertEquals(keyPair.getPublic(), deserializedKey); - assertEquals(KeyAlgorithm.EC.getAlgorithmName(), deserializedKey.getAlgorithm()); + assertSame(keyAlgorithm, KeyAlgorithm.from(deserializedKey.getAlgorithm())); } private static void testPrivateKeySerialization(KeyAlgorithm keyAlgorithm, KeyFormat keyFormat, String pemLabel) { @@ -84,7 +93,7 @@ private static void testPrivateKeySerialization(KeyAlgorithm keyAlgorithm, KeyFo assertTrue(pem.contains("END " + pemLabel)); PrivateKey deserializedKey = KeyUtils.fromPemEncodedPrivateKey(pem); assertEquals(keyPair.getPrivate(), deserializedKey); - assertEquals(keyAlgorithm.getAlgorithmName(), deserializedKey.getAlgorithm()); + assertSame(keyAlgorithm, KeyAlgorithm.from(deserializedKey.getAlgorithm())); } private static XECPrivateKey xecPrivFromHex(String hex) { diff --git a/valgrind-suppressions.txt b/valgrind-suppressions.txt index 36cc61701b4a..c769e6e068f6 100644 --- a/valgrind-suppressions.txt +++ b/valgrind-suppressions.txt @@ -617,3 +617,47 @@ fun:setenv ... } +{ + Protobuf 5.26.1 suppression 7 + Memcheck:Leak + match-leak-kinds: reachable + fun:_Znwm + ... + fun:_ZN6google8protobuf14DescriptorPool24InternalAddGeneratedFileEPKvi + fun:_ZN6google8protobuf8internal14AddDescriptorsEPKNS1_15DescriptorTableE + ... +} +{ + Protobuf 5.26.1 suppression 8 + Memcheck:Leak + match-leak-kinds: reachable + fun:_Znwm + ... + fun:_ZN6google8protobuf8internal13OnShutdownRunEPFvPKvES3_ + fun:_ZN6google8protobuf8internal24InitProtobufDefaultsSlowEv + ... +} +{ + Protobuf 5.26.1 suppression 9 + Memcheck:Leak + match-leak-kinds: reachable + fun:_Znwm + fun:_ZN6google8protobuf12_GLOBAL__N_123GeneratedMessageFactory9singletonEv + fun:_ZN6google8protobuf14MessageFactory29InternalRegisterGeneratedFileEPKNS0_8internal15DescriptorTableE + ... + fun:call_init + ... +} +{ + Protobuf 5.26.1 suppression 10 + Memcheck:Leak + match-leak-kinds: reachable + fun:_Znwm + fun:_ZN4absl12lts_2024011618container_internal19HashSetResizeHelper15InitializeSlotsISaIcELm8ELb1ELm8EEEbRNS1_12CommonFieldsEPvT_.isra.0 + fun:_ZN4absl12lts_2024011618container_internal12raw_hash_setINS1_17FlatHashSetPolicyIPKN6google8protobuf8internal15DescriptorTableEEENS5_12_GLOBAL__N_123GeneratedMessageFactory20DescriptorByNameHashENSC_18DescriptorByNameEqESaIS9_EE6resizeEm + fun:_ZN4absl12lts_2024011618container_internal12raw_hash_setINS1_17FlatHashSetPolicyIPKN6google8protobuf8internal15DescriptorTableEEENS5_12_GLOBAL__N_123GeneratedMessageFactory20DescriptorByNameHashENSC_18DescriptorByNameEqESaIS9_EE14prepare_insertEm + fun:_ZN6google8protobuf14MessageFactory29InternalRegisterGeneratedFileEPKNS0_8internal15DescriptorTableE + ... + fun:call_init + ... +} diff --git a/vespalib/src/tests/util/CMakeLists.txt b/vespalib/src/tests/util/CMakeLists.txt index fef048cc99c7..c750aeb5c129 100644 --- a/vespalib/src/tests/util/CMakeLists.txt +++ b/vespalib/src/tests/util/CMakeLists.txt @@ -25,6 +25,7 @@ vespa_add_executable(vespalib_util_gtest_runner_test_app TEST random_test.cpp rcuvector_test.cpp ref_counted_test.cpp + relative_frequency_sketch_test.cpp require_test.cpp size_literals_test.cpp small_vector_test.cpp diff --git a/vespalib/src/tests/util/relative_frequency_sketch_test.cpp b/vespalib/src/tests/util/relative_frequency_sketch_test.cpp new file mode 100644 index 000000000000..83613356acc3 --- /dev/null +++ b/vespalib/src/tests/util/relative_frequency_sketch_test.cpp @@ -0,0 +1,98 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace vespalib { + +using namespace ::testing; + +namespace { + +struct IdentityHash { + template + constexpr size_t operator()(T v) const noexcept { return v; } +}; + +} + +struct RelativeFrequencySketchTest : Test { + // Note: although the sketch is inherently _probabilistic_, the below tests are fully + // deterministic as long as the underlying hash function remains the same. This is also why + // we explicitly do _not_ use std::hash here, but defer entirely to (deterministic) XXH3. + using U32FrequencySketch = RelativeFrequencySketch; +}; + +TEST_F(RelativeFrequencySketchTest, frequency_estimates_are_initially_zero) { + U32FrequencySketch sketch(2); + EXPECT_EQ(sketch.count_min(0), 0); + EXPECT_EQ(sketch.count_min(12345), 0); + EXPECT_EQ(sketch.estimate_relative_frequency(123, 456), std::weak_ordering::equivalent); +} + +TEST_F(RelativeFrequencySketchTest, frequency_is_counted_up_to_and_saturated_at_15) { + U32FrequencySketch sketch(1); + for (uint32_t i = 1; i <= 20; ++i) { + sketch.add(7); + // With only one entry we're guaranteed to be exact up to the saturation point + if (i < 15) { + EXPECT_EQ(sketch.count_min(7), i); + } else { + EXPECT_EQ(sketch.count_min(7), 15); + } + } +} + +TEST_F(RelativeFrequencySketchTest, add_and_count_returns_min_count_after_add) { + U32FrequencySketch sketch(2); + EXPECT_EQ(sketch.add_and_count(123), 1); + EXPECT_EQ(sketch.add_and_count(123), 2); + EXPECT_EQ(sketch.add_and_count(123), 3); + EXPECT_EQ(sketch.add_and_count(456), 1); +} + +TEST_F(RelativeFrequencySketchTest, can_track_frequency_of_multiple_elements) { + U32FrequencySketch sketch(3); + sketch.add(100); + sketch.add(200); + sketch.add(300); + sketch.add(200); + + EXPECT_EQ(sketch.count_min(100), 1); + EXPECT_EQ(sketch.count_min(200), 2); + EXPECT_EQ(sketch.count_min(300), 1); + EXPECT_EQ(sketch.count_min(400), 0); + + EXPECT_EQ(sketch.estimate_relative_frequency(0, 100), std::weak_ordering::less); + EXPECT_EQ(sketch.estimate_relative_frequency(100, 0), std::weak_ordering::greater); + EXPECT_EQ(sketch.estimate_relative_frequency(100, 100), std::weak_ordering::equivalent); + EXPECT_EQ(sketch.estimate_relative_frequency(100, 300), std::weak_ordering::equivalent); + EXPECT_EQ(sketch.estimate_relative_frequency(300, 100), std::weak_ordering::equivalent); + EXPECT_EQ(sketch.estimate_relative_frequency(100, 200), std::weak_ordering::less); + EXPECT_EQ(sketch.estimate_relative_frequency(200, 100), std::weak_ordering::greater); +} + +TEST_F(RelativeFrequencySketchTest, counters_are_divided_by_2_once_window_size_reached) { + U32FrequencySketch sketch(8); + const auto ws = sketch.window_size(); + std::vector truth(8); + ASSERT_GT(ws, 0); + for (size_t i = 0; i < ws - 1; ++i) { // don't trigger decay just yet + uint32_t elem = i % 8; + sketch.add(elem); + truth[elem]++; + } + std::vector c_before(8); + for (uint32_t i = 0; i < 8; ++i) { + c_before[i] = sketch.count_min(i); + EXPECT_GE(c_before[i], truth[i]); + // No counters should be saturated yet + EXPECT_LT(c_before[i], 15); + } + // Edge triggered sample ==> should divide all counters + sketch.add(9); + for (uint32_t i = 0; i < 8; ++i) { + EXPECT_EQ(sketch.count_min(i), c_before[i] / 2); + } +} + +} diff --git a/vespalib/src/vespa/fastos/file.cpp b/vespalib/src/vespa/fastos/file.cpp index e3e7b385fa2f..a4d9d1ee5c95 100644 --- a/vespalib/src/vespa/fastos/file.cpp +++ b/vespalib/src/vespa/fastos/file.cpp @@ -165,10 +165,9 @@ FastOS_FileInterface::DirectIOPadding(int64_t offset, void * -FastOS_FileInterface::allocateGenericDirectIOBuffer(size_t byteSize, void *&realPtr) +FastOS_FileInterface::allocateIOBuffer(size_t byteSize) { - realPtr = malloc(byteSize); // Default - use malloc allignment - return realPtr; + return malloc(byteSize); // Default - use malloc allignment } size_t @@ -178,9 +177,9 @@ FastOS_FileInterface::getMaxDirectIOMemAlign() } void * -FastOS_FileInterface::AllocateDirectIOBuffer(size_t byteSize, void *&realPtr) +FastOS_FileInterface::AllocateDirectIOBuffer(size_t byteSize) { - return allocateGenericDirectIOBuffer(byteSize, realPtr); + return allocateIOBuffer(byteSize); } void diff --git a/vespalib/src/vespa/fastos/file.h b/vespalib/src/vespa/fastos/file.h index ec2def0adc87..9ab562e7204b 100644 --- a/vespalib/src/vespa/fastos/file.h +++ b/vespalib/src/vespa/fastos/file.h @@ -393,16 +393,12 @@ class FastOS_FileInterface size_t &padAfter); /** - * Allocate a buffer properly alligned with regards to direct io - * access restrictions. + * Allocate a buffer for normal io. * @param byteSize Number of bytes to be allocated - * @param realPtr Reference where the actual pointer returned - * from malloc will be saved. Use free() with - * this pointer to deallocate the buffer. - * This value is always set. - * @return Alligned pointer value or nullptr if out of memory + * @return pointer value or nullptr if out of memory + * Use free() with this pointer to deallocate the buffer. */ - static void *allocateGenericDirectIOBuffer(size_t byteSize, void *&realPtr); + static void *allocateIOBuffer(size_t byteSize); /** * Get maximum memory alignment for directio buffers. @@ -411,16 +407,13 @@ class FastOS_FileInterface static size_t getMaxDirectIOMemAlign(); /** - * Allocate a buffer properly alligned with regards to direct io + * Allocate a buffer properly aligned with regards to direct io * access restrictions. * @param byteSize Number of bytes to be allocated - * @param realPtr Reference where the actual pointer returned - * from malloc will be saved. Use free() with - * this pointer to deallocate the buffer. - * This value is always set. - * @return Alligned pointer value or nullptr if out of memory + * @return Aligned pointer value or nullptr if out of memory. + * Use free() with this pointer to deallocate the buffer. */ - virtual void *AllocateDirectIOBuffer(size_t byteSize, void *&realPtr); + virtual void *AllocateDirectIOBuffer(size_t byteSize); /** * Enable mapping of complete file contents into the address space of the diff --git a/vespalib/src/vespa/fastos/linux_file.cpp b/vespalib/src/vespa/fastos/linux_file.cpp index 0f32aa953a81..3a69f6c5c17b 100644 --- a/vespalib/src/vespa/fastos/linux_file.cpp +++ b/vespalib/src/vespa/fastos/linux_file.cpp @@ -10,14 +10,15 @@ #ifdef __linux__ #include "file.h" #include "file_rw_ops.h" -#include #include #include #include +#include +#include #include #include +#include #include -#include using fastos::File_RW_Ops; @@ -282,24 +283,23 @@ FastOS_Linux_File::SetSize(int64_t newSize) return rc; } - -namespace { - void * align(void * p, size_t alignment) { - const size_t alignMask(alignment-1); - return reinterpret_cast((reinterpret_cast(p) + alignMask) & ~alignMask); - } -} - void * -FastOS_Linux_File::AllocateDirectIOBuffer (size_t byteSize, void *&realPtr) +FastOS_Linux_File::AllocateDirectIOBuffer (size_t byteSize) { size_t dummy1, dummy2; size_t memoryAlignment; + void* ptr = nullptr; GetDirectIORestrictions(memoryAlignment, dummy1, dummy2); - - realPtr = malloc(byteSize + memoryAlignment - 1); - return align(realPtr, memoryAlignment); + memoryAlignment = std::max(memoryAlignment, sizeof(void*)); + int result = posix_memalign(&ptr, memoryAlignment, byteSize); + if (result != 0) { + std::ostringstream os; + os << "posix_memalign(&ptr, " << memoryAlignment << ", " << byteSize << ") failed with code " << result << + " : " << getErrorString(result); + throw std::runtime_error(os.str()); + } + return ptr; } size_t diff --git a/vespalib/src/vespa/fastos/linux_file.h b/vespalib/src/vespa/fastos/linux_file.h index af6e6af51af2..a9239f9c331d 100644 --- a/vespalib/src/vespa/fastos/linux_file.h +++ b/vespalib/src/vespa/fastos/linux_file.h @@ -35,7 +35,7 @@ class FastOS_Linux_File final : public FastOS_UNIX_File int64_t getPosition() const override; bool SetSize(int64_t newSize) override; void ReadBuf(void *buffer, size_t length, int64_t readOffset) override; - void *AllocateDirectIOBuffer(size_t byteSize, void *&realPtr) override; + void *AllocateDirectIOBuffer(size_t byteSize) override; [[nodiscard]] ssize_t Read(void *buffer, size_t len) override; diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 406ea68a08a1..98b0ba3ca437 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -68,6 +68,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT rcuvector.cpp ref_counted.cpp regexp.cpp + relative_frequency_sketch.cpp require.cpp resource_limits.cpp round_up_to_page_size.cpp diff --git a/vespalib/src/vespa/vespalib/util/alloc.cpp b/vespalib/src/vespa/vespalib/util/alloc.cpp index a2f124c6aa96..fd2f3f669513 100644 --- a/vespalib/src/vespa/vespalib/util/alloc.cpp +++ b/vespalib/src/vespa/vespalib/util/alloc.cpp @@ -315,7 +315,8 @@ AlignedHeapAllocator::alloc(size_t sz) const { void* ptr; int result = posix_memalign(&ptr, _alignment, sz); if (result != 0) { - throw IllegalArgumentException(make_string("posix_memalign(%zu, %zu) failed with code %d", sz, _alignment, result)); + throw IllegalArgumentException(make_string("posix_memalign(&ptr, %zu, %zu) failed with code %d : %s", + _alignment, sz, result, getErrorString(result).c_str())); } return PtrAndSize(ptr, sz); } diff --git a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp new file mode 100644 index 000000000000..fe826b24c746 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.cpp @@ -0,0 +1,170 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "relative_frequency_sketch.h" +#include +#include +#include + +namespace vespalib { + +/** + * Cf. the description of the Caffeine sketch in [2][3] we use 8 bytes per cache entry and + * a sample (window) size W that is 10x the cache size (C). It is not immediately clear why + * W/C = 10 rather than 16 since we use 4 bits and log2(10) = 3.321..., but surely the + * underlying reason must be very exciting. + * + * Note: `Alloc` currently does not support < 512 byte alignment, which is suboptimal if + * the allocation is small enough to end up on the heap (FIXME). + */ +RawRelativeFrequencySketch::RawRelativeFrequencySketch(size_t count) + : _buf(alloc::Alloc::alloc_aligned(roundUp2inN(std::max(size_t(64U), count * 8)), 512)), + _estimated_sample_count(0), + _window_size((_buf.size() / 8) * 10), + _block_mask_bits(_buf.size() > 64 ? Optimized::msbIdx(_buf.size() / 64) : 0) +{ + assert(_block_mask_bits <= 44); // Will always be the case in practice, but it's an invariant... + memset(_buf.get(), 0, _buf.size()); +} + +RawRelativeFrequencySketch::~RawRelativeFrequencySketch() = default; + +/** + * Add an element by its hash. This involves incrementing 4 distinct counters based on the hash. + * + * Our sketch buffer is logically divided into buf_size/64 distinct 64-byte blocks. Each + * block is in turn logically divided into 4 rows x 32 4-bit counters, laid out sequentially. + * Each counter is saturated at 15, i.e. there is no overflow. + * + * We first select the block based on the B LSBs of the hash, where B is log2(buffer_size/64) + * and buffer_size is always a power of two. These B bits are considered consumed and are not + * used for anything else. + * + * Within the block we always update exactly 1 counter in each logical row. Use 5 distinct + * bits from the hash for each of the 4 row updates (4 bits to select a byte out of 16, 1 for + * selecting either the high or low in-byte nibble). + * + * Iff the estimated sample count reaches the window size threshold we implicitly divide all + * recorded 4-bit counters in half. + */ +template +uint8_t RawRelativeFrequencySketch::add_by_hash_impl(uint64_t hash) noexcept { + const uint64_t block = hash & ((1ULL << _block_mask_bits) - 1); + hash >>= _block_mask_bits; + assert(block*64 + 64 <= _buf.size()); + auto* block_ptr = static_cast(_buf.get()) + (block * 64); + uint8_t new_counters[4]; + // The compiler will happily and easily unroll this loop. + for (uint8_t i = 0; i < 4; ++i) { + uint8_t h = hash >> (i*5); + uint8_t* vp = block_ptr + (i * 16) + (h & 0xf); // row #i byte select + const uint8_t v = *vp; + h >>= 4; + const uint8_t nib_shift = (h & 1) * 4; // High or low nibble shift factor (4 or 0) + const uint8_t nib_mask = 0xf << nib_shift; + const uint8_t nib_old = (v & nib_mask) >> nib_shift; + new_counters[i] = nib_old < 15 ? nib_old + 1 : 15; // Saturated add + const uint8_t nib_rem = v & ~nib_mask; // Untouched nibble that should be preserved + *vp = (new_counters[i] << nib_shift) | nib_rem; + } + if (++_estimated_sample_count >= _window_size) [[unlikely]] { + div_all_by_2(); + _estimated_sample_count /= 2; + } + if constexpr (ReturnMinCount) { + return std::min(std::min(new_counters[0], new_counters[1]), + std::min(new_counters[2], new_counters[3])); + } else { + return 0; + } +} + +void RawRelativeFrequencySketch::add_by_hash(uint64_t hash) noexcept { + (void)add_by_hash_impl(hash); +} + +uint8_t RawRelativeFrequencySketch::add_and_count_by_hash(uint64_t hash) noexcept { + return add_by_hash_impl(hash); +} + +/** + * Estimates the count associated with the given hash. This uses the exact same counter + * addressing as `add_by_hash()`, so refer to that function for a description on the + * semantics. As the name Count-Min implies we take the _minimum_ of the observed counters + * and return this value to the caller. + * + * This will over-estimate the true frequency iff _all_ counters overlap with at least one + * other element, but it will never under-estimate (here casually ignoring the effects of + * counter decaying). + */ +uint8_t RawRelativeFrequencySketch::count_min_by_hash(uint64_t hash) const noexcept { + const uint64_t block = hash & ((1ULL << _block_mask_bits) - 1); + hash >>= _block_mask_bits; + const uint8_t* block_ptr = static_cast(_buf.get()) + (block * 64); + uint8_t cm[4]; + for (uint8_t i = 0; i < 4; ++i) { + uint8_t h = hash >> (i*5); + const uint8_t* vp = block_ptr + (i * 16) + (h & 0xf); // row #i byte select + h >>= 4; + const uint8_t nib_shift = (h & 1) * 4; // 4 or 0 + const uint8_t nib_mask = 0xf << nib_shift; + cm[i] = (*vp & nib_mask) >> nib_shift; + } + return std::min(std::min(cm[0], cm[1]), std::min(cm[2], cm[3])); +} + +std::strong_ordering +RawRelativeFrequencySketch::estimate_relative_frequency_by_hash(uint64_t lhs_hash, uint64_t rhs_hash) const noexcept { + return count_min_by_hash(lhs_hash) <=> count_min_by_hash(rhs_hash); +} + +/** + * Divides all the 4-bit counters in the sketch by 2. Since this integral division, we + * inherently lose some precision for odd-numbered counter values. + * + * We speed up the division by treating each 64-byte block as 8x u64 values that can + * logically be processed in parallel. The compiler will unroll and auto-vectorize the u64 + * fixed-count inner-loop as expected (verified via Godbolt). + * + * Each u64 value is right-shifted by 1. This shifts the LSB of all 16 4-bit nibbles (except + * the last one) into the MSB of the next nibble. We want the semantics as-if each nibble + * were in its own register, which would mean shifting in a zero bit in the MSB instead. + * We emulate this by explicitly clearing all nibble MSBs. This effectively divides all + * nibbles by 2. This should be entirely endian-agnostic. + * + * Example: + * + * Before: + * nibble#: [ 15 ][ 14 ][ 13 ][ 12 ][ ... + * bits: 1111 0011 0000 1100 ... + * value: 15 3 0 12 ... + * + * After shift (_uncorrected_ prior to masking) + * nibble#: [ 15 ][ 14 ][ 13 ][ 12 ][ ... + * bits: 0111 1001 1000 0110 0... + * value: 7 9 8 6 ... + * + * We will then apply the following per-nibble mask: + * mask: 0111 0111 0111 0111 0... + * + * After shift (corrected by masking off nibble MSBs) + * nibble#: [ 15 ][ 14 ][ 13 ][ 12 ][ ... + * bits: 0111 0001 0000 0110 0... + * value: 7 1 0 6 ... + */ +void RawRelativeFrequencySketch::div_all_by_2() noexcept { + const uint64_t n_blocks = _buf.size() / 64; + auto* block_ptr = static_cast(_buf.get()); + for (uint64_t i = 0; i < n_blocks; ++i) { + for (uint32_t j = 0; j < 8; ++j) { + uint64_t chunk; + static_assert(sizeof(chunk)*8 == 64); + // Compiler will optimize away memcpys (avoids aliasing). + memcpy(&chunk, block_ptr + (8 * j), 8); + chunk >>= 1; + chunk &= 0x7777'7777'7777'7777ULL; // nibble ~MSB mask + memcpy(block_ptr + (8 * j), &chunk, 8); + } + block_ptr += 64; + } +} + +} // vespalib diff --git a/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h new file mode 100644 index 000000000000..1df6c8cee1a9 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/relative_frequency_sketch.h @@ -0,0 +1,147 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "alloc.h" +#include +#include + +namespace vespalib { + +/** + * Adds an implementation of a probabilistic frequency sketch that allows for estimating the + * relative frequency of of elements from a stream of events. That is, the sketch does not + * capture the _absolute_ frequency of a given element over time. + * + * To reduce the requirement for the number of bits used for the sketch's underlying counters, + * this sketch uses automatic decaying of counter values once the number of recorded samples + * reaches a certain point (relative to the sketch's size). Decaying divides all counters by 2. + * + * The underlying data structure is a Count-Min sketch [0][1] with automatic decaying of + * counters based on TinyLFU [2]. + * + * This implementation has certain changes from a "textbook" CM sketch, inspired by the + * approach used in [3]. In particular, instead of having `d` logical rows each with width `w` + * that are accessed with hash-derived indexes (and thus likely triggering `d` cache misses + * for large values of `w`) we subdivide into w/64 blocks each with fixed number d=4 rows of + * 32 4-bit counters, i.e. each block is exactly 64 bytes. Counter updates or reads always + * happen within the scope of a single block. We also ensure the block array is allocated with + * at least a 64-byte alignment. This ensures that a given sketch access will touch exactly 1 + * cache line of the underlying sketch buffer (not counting cache lines occupied by the sketch + * object itself, as we assume these are already present in the cache). + * Similarly, comparing the frequency of two elements will always touch at most 2 cache lines. + * + * Unlike [3] we use byte-wise counter accesses and only using a single hash computation per + * distinct sketch lookup instead of explicitly re-mixing hash bits. We also always divide the + * decay counter by 2 instead of subtracting the number of odd counters found (TODO reconsider?). + * + * The Count-Min sketch (and its cousin, the Counting Bloom Filter) using `k` counters is + * usually described as requiring k pairwise independent hash functions. This implementation + * assumes this requirement is unnecessary assuming a hash function with good entropy; we + * instead extract non-overlapping subsets of bits of a single hash value and use these as + * indices into our data structure components. + * + * Important: this frequency sketch _requires_ a good hash function, i.e. high entropy. + * Use `RelativeFrequencySketch` with HasGoodEntropyHash=false (default) if this is not the + * case for the type being counted, as it implicitly mixes the hash bits using XXH3. + * + * Thread safety: as thread safe as a std::vector. + * + * References: + * [0]: The Count-Min Sketch and its Applications (2003) + * [1]: https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch + * [2]: TinyLFU: A Highly Efficient Cache Admission Policy (2015) + * [3]: https://github.com/ben-manes/caffeine/blob/master/caffeine/ + * src/main/java/com/github/benmanes/caffeine/cache/FrequencySketch.java + */ +class RawRelativeFrequencySketch { + alloc::Alloc _buf; + size_t _estimated_sample_count; + size_t _window_size; + uint32_t _block_mask_bits; +public: + explicit RawRelativeFrequencySketch(size_t count); + ~RawRelativeFrequencySketch(); + + void add_by_hash(uint64_t hash) noexcept; + [[nodiscard]] uint8_t add_and_count_by_hash(uint64_t hash) noexcept; + // Note: since this compares _hashes_ rather than elements this has strong ordering semantics. + [[nodiscard]] std::strong_ordering estimate_relative_frequency_by_hash(uint64_t lhs_hash, uint64_t rhs_hash) const noexcept; + + // Gets the raw underlying counter value saturated in [0, 15] for a given hash. + [[nodiscard]] uint8_t count_min_by_hash(uint64_t hash) const noexcept; + + [[nodiscard]] size_t window_size() const noexcept { return _window_size; } +private: + void div_all_by_2() noexcept __attribute__((noinline)); + + template + uint8_t add_by_hash_impl(uint64_t hash) noexcept; +}; + +template +concept SketchHasher = requires(H h, T t) { + // Hashers should never throw. + { h(t) } noexcept; + // We need a 64-bit hash output (not using uint64_t since STL is standardized + // on returning size_t from hash functions). + { h(t) } -> std::same_as; +}; + +/** + * Wrapper of RawRelativeFrequencySketch for an arbitrary hashable type. + * + * Only set HasGoodEntropyHash=true if you know that the underlying hash function is + * of good quality. This _excludes_ std::hash<> hashes, especially those for integers, + * as the hash function for those is more often than not the identity function. + * + * See `RawRelativeFrequencySketch` for algorithm details. + */ +template Hash = std::hash, bool HasGoodEntropyHash = false> +class RelativeFrequencySketch { + RawRelativeFrequencySketch _impl; + [[no_unique_address]] Hash _hash; +public: + // Initializes a sketch used for estimating frequencies for an underlying cache + // (or similar data structure) that can hold a maximum of `count` entries. + explicit RelativeFrequencySketch(size_t count, Hash hash = Hash{}) + : _impl(count), + _hash(hash) + {} + ~RelativeFrequencySketch() = default; +private: + [[nodiscard]] uint64_t hash_elem(const T& elem) const noexcept { + uint64_t hash = _hash(elem); + if constexpr (!HasGoodEntropyHash) { + hash = xxhash::xxh3_64(hash); // Mix it up! + } + return hash; + } +public: + // Increments the estimated frequency for the given element, identified by its hash. + // Frequency is saturated at 15. + void add(const T& elem) noexcept { + _impl.add_by_hash(hash_elem(elem)); + } + // Same as `add` but returns Count-Min estimate from _after_ `elem` has been added. + [[nodiscard]] uint8_t add_and_count(const T& elem) noexcept { + return _impl.add_and_count_by_hash(hash_elem(elem)); + } + // Returns a frequency estimate for the given element, saturated at 15. Since this is + // a probabilistic sketch, the frequency may be overestimated. Note that automatic counter + // decaying will over time reduce the reported frequency of elements that are no longer + // added to the sketch. + [[nodiscard]] uint8_t count_min(const T& elem) const noexcept { + return _impl.count_min_by_hash(hash_elem(elem)); + } + [[nodiscard]] std::weak_ordering estimate_relative_frequency(const T& lhs, const T& rhs) const noexcept { + const uint64_t lhs_hash = hash_elem(lhs); + const uint64_t rhs_hash = hash_elem(rhs); + return _impl.estimate_relative_frequency_by_hash(lhs_hash, rhs_hash); + } + // Sample count required before all counters are automatically divided by 2. + // Note that invoking `add(v)` for an element `v` whose counters are _all_ fully + // saturated prior to the invocation will _not_ count towards the sample count. + [[nodiscard]] size_t window_size() const noexcept { return _impl.window_size(); } +}; + +} // vespalib diff --git a/vespalib/src/vespa/vespalib/util/round_up_to_page_size.cpp b/vespalib/src/vespa/vespalib/util/round_up_to_page_size.cpp index 80b28d7e0274..86b13a253c17 100644 --- a/vespalib/src/vespa/vespalib/util/round_up_to_page_size.cpp +++ b/vespalib/src/vespa/vespalib/util/round_up_to_page_size.cpp @@ -11,6 +11,11 @@ const size_t page_size = getpagesize(); } +uint64_t round_down_to_page_boundary(uint64_t offset) +{ + return (offset & ~static_cast(page_size - 1)); +} + size_t round_up_to_page_size(size_t size) { return ((size + (page_size - 1)) & ~(page_size - 1)); diff --git a/vespalib/src/vespa/vespalib/util/round_up_to_page_size.h b/vespalib/src/vespa/vespalib/util/round_up_to_page_size.h index 4923d96d94e1..a06f7053a4a5 100644 --- a/vespalib/src/vespa/vespalib/util/round_up_to_page_size.h +++ b/vespalib/src/vespa/vespalib/util/round_up_to_page_size.h @@ -3,9 +3,15 @@ #pragma once #include +#include namespace vespalib { +/* + * Return offset rounded down to a page boundary. + */ +uint64_t round_down_to_page_boundary(uint64_t offset); + /* * Return sz rounded up to a multiple of page size. */
    NameDescriptionUnitSuffixes
    NameUnitSuffixesDescription