Skip to content

Commit

Permalink
Merge pull request #33048 from vespa-engine/hmusum/warn-about-all-fie…
Browse files Browse the repository at this point in the history
…lds-with-inconsistent-settings-when-using-fieldset

Try to warn about all fields having inconsistent settings when they a…
  • Loading branch information
arnej27959 authored Dec 19, 2024
2 parents fad9d3a + 7f396b8 commit 268f85e
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,18 @@ public void process(boolean validate, boolean documentsOnly) {
}

private void checkFieldNames(Schema schema, FieldSet fieldSet) {
for (String field : fieldSet.getFieldNames()) {
if (schema.getField(field) == null)
throw new IllegalArgumentException("For " + schema + ": Field '" + field + "' in " +
fieldSet + " does not exist.");
}
var invalidFieldNames = fieldSet.getFieldNames().stream()
.filter(f -> schema.getField(f) == null)
.map(f -> "'" + f + "'")
.toList();
if (invalidFieldNames.isEmpty()) return;

var message = "For " + schema + ": ";
if (invalidFieldNames.size() == 1)
message = message + "Field " + invalidFieldNames.get(0) + " in " + fieldSet + " does not exist.";
else
message = message + "Fields " + String.join(",", invalidFieldNames) + " in " + fieldSet + " do not exist.";
throw new IllegalArgumentException(message);
}

private void checkMatching(Schema schema, FieldSet fieldSet) {
Expand All @@ -64,10 +71,28 @@ private void checkMatching(Schema schema, FieldSet fieldSet) {
matching = fieldMatching;
} else {
if ( ! matching.equals(fieldMatching)) {
warn(schema, field.asField(),
"The matching settings for the fields in " + fieldSet + " are inconsistent " +
"(explicitly or because of field type). This may lead to recall and ranking issues. " +
"See " + fieldSetDocUrl);
final var buf = new StringBuilder();
buf.append("For schema '").append(schema.getName()).append("': ");
buf.append("The matching settings in ").append(fieldSet);
buf.append(" are inconsistent (explicitly or because of field type). ");
buf.append("This may lead to recall and ranking issues. ");
Matching original = fieldSet.getMatching();
if (original == null) {
buf.append("The fieldset will use matching TEXT. ");
} else {
buf.append("The fieldset will use matching ").append(original.getType()).append(". ");
}
var list = fieldSet.getFieldNames().stream()
.map(name -> schema.getField(name))
.filter(f -> (f != null))
.filter(f -> (f.getMatching() != null))
.map(f -> " Field '" + f.asField().getName() + "' has matching " + f.getMatching().getType())
.toList();
buf.append(list);
buf.append(" See ").append(fieldSetDocUrl);
deployLogger.logApplicationPackage(Level.WARNING, buf.toString());
// TODO: Remove (see FieldSetSettingsTestCase#inconsistentMatchingShouldStillSetMatchingForFieldSet)
// but when doing so matching for a fieldset might change
return;
}
}
Expand All @@ -88,7 +113,6 @@ private void checkNormalization(Schema schema, FieldSet fieldSet) {
"The normalization settings for the fields in " + fieldSet + " are inconsistent " +
"(explicitly or because of field type). This may lead to recall and ranking issues. " +
"See " + fieldSetDocUrl);
return;
}
}
}
Expand All @@ -108,7 +132,6 @@ private void checkStemming(Schema schema, FieldSet fieldSet) {
"' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues. " +
"See " + fieldSetDocUrl);
return;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,19 @@
package com.yahoo.schema.processing;

import com.yahoo.config.model.application.provider.BaseDeployLogger;
import com.yahoo.schema.ApplicationBuilder;
import com.yahoo.schema.derived.TestableDeployLogger;
import com.yahoo.schema.document.MatchType;
import com.yahoo.schema.parser.ParseException;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import static com.yahoo.schema.ApplicationBuilder.createFromStrings;
import static com.yahoo.schema.document.MatchType.EXACT;
import static com.yahoo.schema.document.MatchType.WORD;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;

public class FieldSetSettingsTestCase {

Expand All @@ -22,9 +29,11 @@ public void warnableFieldTypeMix() {
var logger = new TestableDeployLogger();
assertDoesNotThrow(() -> createFromStrings(logger, childSd("fieldset default { fields: ci,ps }"), parentSd()));
assertArrayEquals(new String[]{
"For schema 'child', field 'ps': " +
"The matching settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues. See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset",
"For schema 'child': " +
"The matching settings in fieldset 'default' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues. The fieldset will use matching TEXT. " +
"[ Field 'ci' has matching TEXT, Field 'ps' has matching WORD] " +
"See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset",
"For schema 'child', field 'ps': " +
"The normalization settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues. See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset"},
Expand All @@ -39,6 +48,33 @@ public void illegalFieldTypeMix() {
"See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset"}, logger.warnings.toArray());
}

@Test
@Disabled
// Test that match setting for a field will be a match settings one of the fields
// in the set has, not the default match setting for a field
// TODO: This now fails because setting match setting for a fieldset is done after
// checking if there are inconsistencies in match settings for fields in a fieldset,
// but code today return if it finds such an inconsistency WITHOUT setting match
// setting for the fieldset, which means it will end up being the default match setting
// (TEXT). As shown in this test, it should be either WORD or EXACT (fields are
// processed in lexical order of fioeld name, so the first field will determine which match
// setting is used.
public void inconsistentMatchingShouldStillSetMatchingForFieldSet() throws ParseException {
var logger = new TestableDeployLogger();

// a is field with word mathcing => word matching for fieldset
var builder = createFromStrings(logger, schemaWithMatchSettings("fieldset default { fields: a, b }", "a", "b"));
assertMatchType(builder, WORD);

// a is field with exact mathcing => exact matchong for fieldset
builder = createFromStrings(logger, schemaWithMatchSettings("fieldset default { fields: a, b }", "b", "a"));
assertMatchType(builder, EXACT);
}

private static void assertMatchType(ApplicationBuilder builder, MatchType matchType) {
var fieldSet = builder.getSchema().fieldSets().userFieldSets().values().iterator().next();
assertEquals(matchType, fieldSet.getMatching().getType());
}

private static String childSd(String fieldSet) {
return """
Expand All @@ -64,6 +100,7 @@ field ct type tensor(x[2]) {
}
""";
}

private static String parentSd() {
return """
schema parent {
Expand All @@ -81,4 +118,23 @@ field pt type tensor(x[2]) {
}
""";
}

private static String schemaWithMatchSettings(String fieldSet, String fieldNameWithWordMatching, String fieldNameWithExactMatching) {
return """
schema index_variants {
document index_variants {
field %s type string {
indexing: index
match: word
}
field %s type string {
indexing: index
match: exact
}
}
%s
}
""".formatted(fieldNameWithWordMatching, fieldNameWithExactMatching, fieldSet);
}

}

0 comments on commit 268f85e

Please sign in to comment.