From 045e9c003dedd0d0a17dcc580e3b1f18a2462fcf Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Fri, 30 Aug 2024 13:36:46 +0000 Subject: [PATCH] add feature flag for tuning memory on logserver nodes --- config-model-api/abi-spec.json | 1 + .../yahoo/config/model/api/ModelContext.java | 1 + .../validation/QuotaValidator.java | 4 ++- .../config/provision/CapacityPolicies.java | 26 ++++++++++++++----- .../server/deploy/ModelContextImpl.java | 3 +++ .../com/yahoo/vespa/flags/PermanentFlags.java | 6 +++++ .../hosted/provision/NodeRepository.java | 8 +++++- 7 files changed, 41 insertions(+), 8 deletions(-) diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json index fe1648f6bb5b..2e06c1a994da 100644 --- a/config-model-api/abi-spec.json +++ b/config-model-api/abi-spec.json @@ -1346,6 +1346,7 @@ "public boolean logserverOtelCol()", "public com.yahoo.config.provision.SharedHosts sharedHosts()", "public com.yahoo.config.provision.NodeResources$Architecture adminClusterArchitecture()", + "public double logserverNodeMemory()", "public boolean symmetricPutAndActivateReplicaSelection()", "public boolean enforceStrictlyIncreasingClusterStateVersions()", "public boolean distributionConfigFromClusterController()", diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index 63d767c5d442..547b7623d887 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -119,6 +119,7 @@ interface FeatureFlags { @ModelFeatureFlag(owners = {"olaa"}) default boolean logserverOtelCol() { return false; } @ModelFeatureFlag(owners = {"bratseth"}) default SharedHosts sharedHosts() { return SharedHosts.empty(); } @ModelFeatureFlag(owners = {"bratseth"}) default Architecture adminClusterArchitecture() { return Architecture.x86_64; } + @ModelFeatureFlag(owners = {"arnej"}) default double logserverNodeMemory() { return 0.0; } @ModelFeatureFlag(owners = {"vekterli"}) default boolean symmetricPutAndActivateReplicaSelection() { return false; } @ModelFeatureFlag(owners = {"vekterli"}) default boolean enforceStrictlyIncreasingClusterStateVersions() { return false; } @ModelFeatureFlag(owners = {"vekterli"}) default boolean distributionConfigFromClusterController() { return false; } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java index ea579aaf5d15..b375c835d2c4 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java @@ -34,8 +34,10 @@ public class QuotaValidator implements Validator { public void validate(Context context) { var zone = context.deployState().zone(); var exclusivity = new Exclusivity(zone, context.deployState().featureFlags().sharedHosts()); + var tuning = new CapacityPolicies.Tuning(context.deployState().featureFlags().adminClusterArchitecture(), + context.deployState().featureFlags().logserverNodeMemory()); var capacityPolicies = new CapacityPolicies(zone, exclusivity, context.model().applicationPackage().getApplicationId(), - context.deployState().featureFlags().adminClusterArchitecture()); + tuning); var quota = context.deployState().getProperties().quota(); quota.maxClusterSize().ifPresent(maxClusterSize -> validateMaxClusterSize(maxClusterSize, context.model())); quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, context, capacityPolicies)); diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java index 7d44c4bb8e1c..85699b155263 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java @@ -17,16 +17,29 @@ */ public class CapacityPolicies { + public record Tuning(Architecture adminClusterArchitecture, + double logserverMemoryGiB) + { + double logserverMem(double v) { + double override = logserverMemoryGiB(); + return (override > 0) ? override : v; + } + } + private final Zone zone; private final Exclusivity exclusivity; private final ApplicationId applicationId; - private final Architecture adminClusterArchitecture; + private final Tuning tuning; public CapacityPolicies(Zone zone, Exclusivity exclusivity, ApplicationId applicationId, Architecture adminClusterArchitecture) { + this(zone, exclusivity, applicationId, new Tuning(adminClusterArchitecture, 0.0)); + } + + public CapacityPolicies(Zone zone, Exclusivity exclusivity, ApplicationId applicationId, Tuning tuning) { this.zone = zone; this.exclusivity = exclusivity; this.applicationId = applicationId; - this.adminClusterArchitecture = adminClusterArchitecture; + this.tuning = tuning; } public Capacity applyOn(Capacity capacity, boolean exclusive) { @@ -92,6 +105,7 @@ public NodeResources specifyFully(NodeResources resources, ClusterSpec clusterSp } private NodeResources defaultResources(ClusterSpec clusterSpec) { + var adminClusterArchitecture = tuning.adminClusterArchitecture(); if (clusterSpec.type() == ClusterSpec.Type.admin) { if (exclusivity.allocation(clusterSpec)) { return smallestExclusiveResources().with(adminClusterArchitecture); @@ -134,14 +148,14 @@ private NodeResources clusterControllerResources(ClusterSpec clusterSpec, Archit private NodeResources logserverResources(Architecture architecture) { if (zone.cloud().name() == CloudName.AZURE) - return new NodeResources(2, 4, 50, 0.3); + return new NodeResources(2, tuning.logserverMem(4.0), 50, 0.3); if (zone.cloud().name() == CloudName.GCP) - return new NodeResources(1, 4, 50, 0.3); + return new NodeResources(1, tuning.logserverMem(4.0), 50, 0.3); return architecture == Architecture.arm64 - ? new NodeResources(0.5, 2.5, 50, 0.3) - : new NodeResources(0.5, 2, 50, 0.3); + ? new NodeResources(0.5, tuning.logserverMem(2.5), 50, 0.3) + : new NodeResources(0.5, tuning.logserverMem(2.0), 50, 0.3); } // The lowest amount of resources that can be exclusive allocated (i.e. a matching host flavor for this exists) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 715085c2dcd1..1143724128a9 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -208,6 +208,7 @@ public static class FeatureFlags implements ModelContext.FeatureFlags { private final boolean logserverOtelCol; private final SharedHosts sharedHosts; private final Architecture adminClusterArchitecture; + private final double logserverNodeMemory; private final boolean symmetricPutAndActivateReplicaSelection; private final boolean enforceStrictlyIncreasingClusterStateVersions; private final boolean launchApplicationAthenzService; @@ -258,6 +259,7 @@ public FeatureFlags(FlagSource source, ApplicationId appId, Version version) { this.logserverOtelCol = Flags.LOGSERVER_OTELCOL_AGENT.bindTo(source).with(appId).with(version).value(); this.sharedHosts = PermanentFlags.SHARED_HOST.bindTo(source).with( appId).with(version).value(); this.adminClusterArchitecture = Architecture.valueOf(PermanentFlags.ADMIN_CLUSTER_NODE_ARCHITECTURE.bindTo(source).with(appId).with(version).value()); + this.logserverNodeMemory = PermanentFlags.LOGSERVER_NODE_MEMORY.bindTo(source).with(appId).with(version).value(); this.symmetricPutAndActivateReplicaSelection = Flags.SYMMETRIC_PUT_AND_ACTIVATE_REPLICA_SELECTION.bindTo(source).with(appId).with(version).value(); this.enforceStrictlyIncreasingClusterStateVersions = Flags.ENFORCE_STRICTLY_INCREASING_CLUSTER_STATE_VERSIONS.bindTo(source).with(appId).with(version).value(); this.launchApplicationAthenzService = Flags.LAUNCH_APPLICATION_ATHENZ_SERVICE.bindTo(source).with(appId).with(version).value(); @@ -314,6 +316,7 @@ public FeatureFlags(FlagSource source, ApplicationId appId, Version version) { @Override public boolean logserverOtelCol() { return logserverOtelCol; } @Override public SharedHosts sharedHosts() { return sharedHosts; } @Override public Architecture adminClusterArchitecture() { return adminClusterArchitecture; } + @Override public double logserverNodeMemory() { return logserverNodeMemory; } @Override public boolean symmetricPutAndActivateReplicaSelection() { return symmetricPutAndActivateReplicaSelection; } @Override public boolean enforceStrictlyIncreasingClusterStateVersions() { return enforceStrictlyIncreasingClusterStateVersions; } @Override public boolean distributionConfigFromClusterController() { return distributionConfigFromClusterController; } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java index a79c90877c11..cab679c14e48 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java @@ -329,6 +329,12 @@ public class PermanentFlags { value -> Set.of("any", "arm64", "x86_64").contains(value), INSTANCE_ID); + public static final UnboundDoubleFlag LOGSERVER_NODE_MEMORY = defineDoubleFlag( + "logserver-node-memory", 0.0, + "Amount of memory (in GiB) to allocate for logserver nodes", + "Takes effect on allocation from node repository", + INSTANCE_ID); + public static final UnboundListFlag CLOUD_ACCOUNTS = defineListFlag( "cloud-accounts", List.of(), String.class, "A list of 12-digit AWS account IDs that are valid for the given tenant", diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 0778e21fcd84..350b6d2e9fa6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -215,7 +215,13 @@ public CapacityPolicies capacityPoliciesFor(ApplicationId applicationId) { .bindTo(flagSource) .with(INSTANCE_ID, applicationId.serializedForm()) .value(); - return new CapacityPolicies(zone, exclusivity(), applicationId, Architecture.valueOf(adminClusterNodeArchitecture)); + double logserverMemory = PermanentFlags.LOGSERVER_NODE_MEMORY + .bindTo(flagSource) + .with(INSTANCE_ID, applicationId.serializedForm()) + .value(); + var tuning = new CapacityPolicies.Tuning(Architecture.valueOf(adminClusterNodeArchitecture), + logserverMemory); + return new CapacityPolicies(zone, exclusivity(), applicationId, tuning); } /**