Skip to content

Commit

Permalink
Merge branch 'master' into bratseth/autoscaling-completion
Browse files Browse the repository at this point in the history
  • Loading branch information
bratseth authored Mar 10, 2023
2 parents 315c1fe + 88b39d0 commit 7ddc151
Show file tree
Hide file tree
Showing 54 changed files with 452 additions and 526 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer;

import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;

/**
Expand All @@ -23,6 +24,7 @@
public class InputRecorder extends ExpressionTransformer<RankProfileTransformContext> {

private final Set<String> neededInputs;
private final Set<String> handled = new HashSet<>();

public InputRecorder(Set<String> target) {
this.neededInputs = target;
Expand Down Expand Up @@ -52,9 +54,13 @@ private void handle(ReferenceNode feature, RankProfileTransformContext context)
simpleFunctionOrIdentifier = true;
}
if (simpleFunctionOrIdentifier) {
if (handled.contains(name)) {
return;
}
var f = context.rankProfile().getFunctions().get(name);
if (f != null && f.function().arguments().size() == 0) {
transform(f.function().getBody().getRoot(), context);
handled.add(name);
return;
}
neededInputs.add(feature.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import com.yahoo.component.annotation.Inject;
import com.yahoo.container.handler.Timing;
import com.yahoo.container.logging.AccessLog;
import com.yahoo.container.logging.AccessLogEntry;
import com.yahoo.jdisc.Metric;
import com.yahoo.jdisc.Response;
Expand Down Expand Up @@ -102,54 +101,54 @@ private static String getClientIP(com.yahoo.jdisc.http.HttpRequest httpRequest)

private void logTimes(long startTime, String sourceIP,
long renderStartTime, long commitStartTime, long endTime,
String req, String normalizedQuery, Timing t) {
String req, ExtendedResponse response) {

// note: intentionally only taking time since request was received
long totalTime = endTime - startTime;

long timeoutInterval = Long.MAX_VALUE;
long requestOverhead = 0;
long summaryStartTime = 0;
long timeoutInterval;
long requestOverhead;
long summaryStartTime;
Timing t = response.getTiming();
if (t != null) {
timeoutInterval = t.getTimeout();
long queryStartTime = t.getQueryStartTime();
if (queryStartTime > 0) {
requestOverhead = queryStartTime - startTime;
}
requestOverhead = (queryStartTime > 0) ? queryStartTime - startTime : 0;
summaryStartTime = t.getSummaryStartTime();
}

if (totalTime <= timeoutInterval) {
return;
}

StringBuilder b = new StringBuilder();
b.append(normalizedQuery);
b.append(" from ").append(sourceIP).append(". ");

if (requestOverhead > 0) {
b.append("Time from HTTP connection open to request reception ");
b.append(requestOverhead).append(" ms. ");
}
if (summaryStartTime != 0) {
b.append("Request time: ");
b.append(summaryStartTime - startTime).append(" ms. ");
b.append("Summary fetch time: ");
b.append(renderStartTime - summaryStartTime).append(" ms. ");
} else {
long spentSearching = renderStartTime - startTime;
b.append("Processing time: ").append(spentSearching).append(" ms. ");
requestOverhead = 0;
summaryStartTime = 0;
timeoutInterval = Long.MAX_VALUE;
}

b.append("Result rendering/transfer: ");
b.append(commitStartTime - renderStartTime).append(" ms. ");
b.append("End transaction: ");
b.append(endTime - commitStartTime).append(" ms. ");
b.append("Total: ").append(totalTime).append(" ms. ");
b.append("Timeout: ").append(timeoutInterval).append(" ms. ");
b.append("Request string: ").append(req);
if (totalTime <= timeoutInterval) return;

log.log(Level.WARNING, "Slow execution. " + b);
log.log(Level.FINE, () -> {
StringBuilder b = new StringBuilder();
b.append(response.getParsedQuery());
b.append(" from ").append(sourceIP).append(". ");
if (requestOverhead > 0) {
b.append("Time from HTTP connection open to request reception ");
b.append(requestOverhead).append(" ms. ");
}
if (summaryStartTime != 0) {
b.append("Request time: ");
b.append(summaryStartTime - startTime).append(" ms. ");
b.append("Summary fetch time: ");
b.append(renderStartTime - summaryStartTime).append(" ms. ");
} else {
long spentSearching = renderStartTime - startTime;
b.append("Processing time: ").append(spentSearching).append(" ms. ");
}
b.append("Result rendering/transfer: ");
b.append(commitStartTime - renderStartTime).append(" ms. ");
b.append("End transaction: ");
b.append(endTime - commitStartTime).append(" ms. ");
b.append("Total: ").append(totalTime).append(" ms. ");
b.append("Timeout: ").append(timeoutInterval).append(" ms. ");
b.append("Request string: ").append(req);
return b.toString();
});
}

private static class NullResponse extends ExtendedResponse {
Expand Down Expand Up @@ -224,8 +223,7 @@ private void writeToLogs(long endTime) {
commitStartTime,
endTime,
getUri(jdiscRequest),
extendedResponse.getParsedQuery(),
extendedResponse.getTiming());
extendedResponse);

Optional<AccessLogEntry> jdiscRequestAccessLogEntry =
AccessLoggingRequestHandler.getAccessLogEntry(jdiscRequest);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,25 @@ static TensorType toVespaType(ValueInfo valueInfo) {
}

static private TensorType.Value toVespaValueType(TensorInfo.OnnxTensorType onnxType) {
// NOTE:
// should match best_cell_type in onnx_wrapper.cpp
switch (onnxType) {
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: return TensorType.Value.INT8;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: return TensorType.Value.BFLOAT16;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: return TensorType.Value.FLOAT;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: return TensorType.Value.DOUBLE;
}
case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
return TensorType.Value.INT8;

case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
return TensorType.Value.BFLOAT16;

case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
return TensorType.Value.FLOAT;

case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
return TensorType.Value.DOUBLE;
}
return TensorType.Value.DOUBLE;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,27 @@ static OrderedTensorType typeFrom(Onnx.TensorProto tensor) {
tensor.getDimsList());
}

private static TensorType.Value toValueType(Onnx.TensorProto.DataType dataType) {
switch (dataType) {
case FLOAT: return TensorType.Value.FLOAT;
case DOUBLE: return TensorType.Value.DOUBLE;
// Imperfect conversion, for now:
case BOOL: return TensorType.Value.FLOAT;
case INT8: return TensorType.Value.FLOAT;
case INT16: return TensorType.Value.FLOAT;
case INT32: return TensorType.Value.FLOAT;
case INT64: return TensorType.Value.FLOAT;
case UINT8: return TensorType.Value.FLOAT;
case UINT16: return TensorType.Value.FLOAT;
case UINT32: return TensorType.Value.FLOAT;
case UINT64: return TensorType.Value.FLOAT;
default: throw new IllegalArgumentException("A ONNX tensor with data type " + dataType +
private static TensorType.Value toValueType(Onnx.TensorProto.DataType onnxType) {
// NOTE:
// should match best_cell_type in onnx_wrapper.cpp
switch (onnxType) {
case BOOL: // Imperfect conversion fallthrough
case INT8:
return TensorType.Value.INT8;
case BFLOAT16:
return TensorType.Value.BFLOAT16;
case UINT8: // Imperfect conversion fallthrough
case INT16: // Imperfect conversion fallthrough
case UINT16: // Imperfect conversion fallthrough
case FLOAT:
return TensorType.Value.FLOAT;
case INT32: // Imperfect conversion fallthrough
case INT64: // Imperfect conversion fallthrough
case UINT32: // Imperfect conversion fallthrough
case UINT64: // Imperfect conversion fallthrough
case DOUBLE:
return TensorType.Value.DOUBLE;
default: throw new IllegalArgumentException("A ONNX tensor with data type " + onnxType +
" cannot be converted to a Vespa tensor type");
}
}
Expand Down
6 changes: 5 additions & 1 deletion model-integration/src/main/protobuf/onnx.proto
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ message TensorProto {
UINT64 = 13;
COMPLEX64 = 14; // complex with float32 real and imaginary components
COMPLEX128 = 15; // complex with float64 real and imaginary components
// Non-IEEE floating-point format based on IEEE754 single-precision
// floating-point number truncated to 16 bits.
// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
BFLOAT16 = 16;
// Future extensions go here.
}

Expand Down Expand Up @@ -461,4 +465,4 @@ message OperatorSetIdProto {
// The version of the operator set being identified.
// This field MUST be present in this version of the IR.
optional int64 version = 2;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,9 @@ public Mutex lock(ApplicationId application, Duration timeout) {
return db.lock(application, timeout);
}

/** Create a lock which provides exclusive rights to perform a maintenance deployment */
public Mutex lockMaintenance(ApplicationId application) {
return db.lockMaintenance(application);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,18 @@ protected final double maintain() {
return 1.0;
}

protected final Deployer deployer() { return deployer; }

/** Returns the number of deployments that are pending execution */
public int pendingDeployments() {
return pendingDeployments.size();
}

/** Returns whether given application should be deployed at this moment in time */
protected boolean canDeployNow(ApplicationId application) {
return true;
}
protected abstract boolean canDeployNow(ApplicationId application);

/** Returns the applications that should be maintained by this now. */
protected abstract Map<ApplicationId, String> applicationsNeedingMaintenance();

/**
* Redeploy this application.
Expand All @@ -64,19 +67,14 @@ protected boolean canDeployNow(ApplicationId application) {
*/
protected void deploy(ApplicationId application, String reason) {
if (pendingDeployments.addIfAbsent(application)) { // Avoid queuing multiple deployments for same application
deploymentExecutor.execute(() -> deployWithLock(application, reason));
deploymentExecutor.execute(() -> deployNow(application, reason));
}
}

protected Deployer deployer() { return deployer; }

/** Returns the applications that should be maintained by this now. */
protected abstract Map<ApplicationId, String> applicationsNeedingMaintenance();

/**
* Redeploy this application. A lock will be taken for the duration of the deployment activation
*/
protected final void deployWithLock(ApplicationId application, String reason) {
protected final void deployNow(ApplicationId application, String reason) {
try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) {
if ( ! deployment.isValid()) return; // this will be done at another config server
if ( ! canDeployNow(application)) return; // redeployment is no longer needed
Expand All @@ -97,15 +95,17 @@ protected final Instant getLastDeployTime(ApplicationId application) {
@Override
public void shutdown() {
super.shutdown();
this.deploymentExecutor.shutdownNow();
deploymentExecutor.shutdownNow();
}

@Override
public void awaitShutdown() {
super.awaitShutdown();
try {
// Give deployments in progress some time to complete
this.deploymentExecutor.awaitTermination(1, TimeUnit.MINUTES);
if (!deploymentExecutor.awaitTermination(1, TimeUnit.MINUTES)) {
log.log(Level.WARNING, "Failed to shut down deployment executor within deadline");
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Environment;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
Expand All @@ -19,6 +18,7 @@
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot;
import com.yahoo.vespa.hosted.provision.node.History;

import java.time.Duration;
import java.time.Instant;
import java.util.Map;
Expand Down Expand Up @@ -68,6 +68,7 @@ protected double maintain() {
* @return true if an autoscaling decision was made or nothing should be done, false if there was an error
*/
private boolean autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId) {
boolean redeploy = false;
try (var lock = nodeRepository().applications().lock(applicationId)) {
Optional<Application> application = nodeRepository().applications().get(applicationId);
if (application.isEmpty()) return true;
Expand All @@ -93,23 +94,24 @@ private boolean autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId)
applications().put(application.get().with(cluster), lock);

// Attempt to perform the autoscaling immediately, and log it regardless
if (autoscaling != null
&& autoscaling.resources().isPresent()
&& !current.equals(autoscaling.resources().get())) {
try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) {
if (deployment.isValid())
deployment.activate();
logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired());
}
if (autoscaling != null && autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) {
redeploy = true;
logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired());
}
return true;
}
catch (ApplicationLockException e) {
return false;
}
catch (IllegalArgumentException e) {
throw new IllegalArgumentException("Illegal arguments for " + applicationId + " cluster " + clusterId, e);
}
if (redeploy) {
try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) {
if (deployment.isValid())
deployment.activate();
}
}
return true;
}

private Applications applications() {
Expand Down
Loading

0 comments on commit 7ddc151

Please sign in to comment.