Skip to content

Commit

Permalink
Try to warn about all fields having inconsistent settings when they a…
Browse files Browse the repository at this point in the history
…re used in a field set

Note that this is not done for match settings, as that will change behavior,
but we should probably do that change, as it might be wrong now
(e.g. you can get 'text' matching when the 2 fields in a fieldset have
matching 'exact' and 'word')
  • Loading branch information
hmusum committed Dec 17, 2024
1 parent 0d9e9ae commit 0dc4bfa
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,18 @@ public void process(boolean validate, boolean documentsOnly) {
}

private void checkFieldNames(Schema schema, FieldSet fieldSet) {
for (String field : fieldSet.getFieldNames()) {
if (schema.getField(field) == null)
throw new IllegalArgumentException("For " + schema + ": Field '" + field + "' in " +
fieldSet + " does not exist.");
}
var invalidFieldNames = fieldSet.getFieldNames().stream()
.filter(f -> schema.getField(f) == null)
.map(f -> "'" + f + "'")
.toList();
if (invalidFieldNames.isEmpty()) return;

var message = "For " + schema + ": ";
if (invalidFieldNames.size() == 1)
message = message + "Field " + invalidFieldNames.get(0) + " in " + fieldSet + " does not exist.";
else
message = message + "Fields " + String.join(",", invalidFieldNames) + " in " + fieldSet + " do not exist.";
throw new IllegalArgumentException(message);
}

private void checkMatching(Schema schema, FieldSet fieldSet) {
Expand All @@ -67,7 +74,10 @@ private void checkMatching(Schema schema, FieldSet fieldSet) {
warn(schema, field.asField(),
"The matching settings for the fields in " + fieldSet + " are inconsistent " +
"(explicitly or because of field type). This may lead to recall and ranking issues. " +
"The matching setting that will be used for this fieldset is " + matching.getType() + ". " +
"See " + fieldSetDocUrl);
// TODO: Remove (see FieldSetSettingsTestCase#inconsistentMatchingShouldStillSetMatchingForFieldSet)
// but when doing so matching for a fieldset might change
return;
}
}
Expand All @@ -88,7 +98,6 @@ private void checkNormalization(Schema schema, FieldSet fieldSet) {
"The normalization settings for the fields in " + fieldSet + " are inconsistent " +
"(explicitly or because of field type). This may lead to recall and ranking issues. " +
"See " + fieldSetDocUrl);
return;
}
}
}
Expand All @@ -108,7 +117,6 @@ private void checkStemming(Schema schema, FieldSet fieldSet) {
"' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues. " +
"See " + fieldSetDocUrl);
return;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,19 @@
package com.yahoo.schema.processing;

import com.yahoo.config.model.application.provider.BaseDeployLogger;
import com.yahoo.schema.ApplicationBuilder;
import com.yahoo.schema.derived.TestableDeployLogger;
import com.yahoo.schema.document.MatchType;
import com.yahoo.schema.parser.ParseException;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import static com.yahoo.schema.ApplicationBuilder.createFromStrings;
import static com.yahoo.schema.document.MatchType.EXACT;
import static com.yahoo.schema.document.MatchType.WORD;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;

public class FieldSetSettingsTestCase {

Expand All @@ -24,7 +31,8 @@ public void warnableFieldTypeMix() {
assertArrayEquals(new String[]{
"For schema 'child', field 'ps': " +
"The matching settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues. See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset",
"This may lead to recall and ranking issues. The matching setting that will be used for this fieldset is TEXT. " +
"See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset",
"For schema 'child', field 'ps': " +
"The normalization settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues. See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset"},
Expand All @@ -39,6 +47,33 @@ public void illegalFieldTypeMix() {
"See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset"}, logger.warnings.toArray());
}

@Test
@Disabled
// Test that match setting for a field will be a match settings one of the fields
// in the set has, not the default match setting for a field
// TODO: This now fails because setting match setting for a fieldset is done after
// checking if there are inconsistencies in match settings for fields in a fieldset,
// but code today return if it finds such an inconsistency WITHOUT setting match
// setting for the fieldset, which means it will end up being the default match setting
// (TEXT). As shown in this test, it should be either WORD or EXACT (fields are
// processed in lexical order of fioeld name, so the first field will determine which match
// setting is used.
public void inconsistentMatchingShouldStillSetMatchingForFieldSet() throws ParseException {
var logger = new TestableDeployLogger();

// a is field with word mathcing => word matching for fieldset
var builder = createFromStrings(logger, schemaWithMatchSettings("fieldset default { fields: a, b }", "a", "b"));
assertMatchType(builder, WORD);

// a is field with exact mathcing => exact matchong for fieldset
builder = createFromStrings(logger, schemaWithMatchSettings("fieldset default { fields: a, b }", "b", "a"));
assertMatchType(builder, EXACT);
}

private static void assertMatchType(ApplicationBuilder builder, MatchType matchType) {
var fieldSet = builder.getSchema().fieldSets().userFieldSets().values().iterator().next();
assertEquals(matchType, fieldSet.getMatching().getType());
}

private static String childSd(String fieldSet) {
return """
Expand All @@ -64,6 +99,7 @@ field ct type tensor(x[2]) {
}
""";
}

private static String parentSd() {
return """
schema parent {
Expand All @@ -81,4 +117,23 @@ field pt type tensor(x[2]) {
}
""";
}

private static String schemaWithMatchSettings(String fieldSet, String fieldNameWithWordMatching, String fieldNameWithExactMatching) {
return """
schema index_variants {
document index_variants {
field %s type string {
indexing: index
match: word
}
field %s type string {
indexing: index
match: exact
}
}
%s
}
""".formatted(fieldNameWithWordMatching, fieldNameWithExactMatching, fieldSet);
}

}

0 comments on commit 0dc4bfa

Please sign in to comment.