Skip to content

Commit

Permalink
Merge pull request #21160 from vespa-engine/arnej/process-all-documen…
Browse files Browse the repository at this point in the history
…t-summaries

Arnej/process all document summaries
  • Loading branch information
bratseth authored Feb 12, 2022
2 parents 94ac841 + 473d4f4 commit 77954df
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,28 @@ public class AddExtraFieldsToDocument extends Processor {
super(schema, deployLogger, rankProfileRegistry, queryProfiles);
}

//TODO This is a tempoarry hack to avoid producing illegal code for fields not wanted anyway.
private boolean dirtyLegalFieldNameCheck(String fieldName) {
return ! fieldName.contains(".") && !"rankfeatures".equals(fieldName) && !"summaryfeatures".equals(fieldName);
}

@Override
public void process(boolean validate, boolean documentsOnly) {
SDDocumentType document = schema.getDocument();
if (document != null) {
for (SDField field : schema.extraFieldList()) {
addSdField(schema, document, field, validate);
}
//TODO Vespa 8 or sooner we should avoid the dirty addition of fields from dirty 'default' summary to document at all
for (SummaryField field : schema.getSummary("default").getSummaryFields().values()) {
if (dirtyLegalFieldNameCheck(field.getName())) {
addSummaryField(schema, document, field, validate);
for (var docsum : schema.getSummaries().values()) {
for (var summaryField : docsum.getSummaryFields().values()) {
switch (summaryField.getTransform()) {
case NONE:
case BOLDED:
case DYNAMICBOLDED:
case DYNAMICTEASER:
case TEXTEXTRACTOR:
addSummaryField(schema, document, summaryField, validate);
break;
default:
// skip: generated from attribute or similar,
// so does not need to be included as an extra
// field in the document type
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@ ilscript[].docfield[7] "f"
ilscript[].docfield[8] "g"
ilscript[].docfield[9] "h"
ilscript[].docfield[10] "loc"
ilscript[].docfield[11] "mytags"
ilscript[].content[0] "clear_state | guard { input loc | to_pos | zcurve | attribute loc_pos_zcurve; }"
ilscript[].content[1] "clear_state | guard { input a | tokenize normalize stem:\"BEST\" | summary abolded2 | summary aboldeddynamic | summary adynamic2 | attribute a; }"
ilscript[].content[2] "clear_state | guard { input adynamic | tokenize normalize stem:\"BEST\" | summary adynamic | attribute adynamic; }"
ilscript[].content[3] "clear_state | guard { input abolded | tokenize normalize stem:\"BEST\" | summary abolded | attribute abolded; }"
ilscript[].content[4] "clear_state | guard { input b | summary b; }"
ilscript[].content[4] "clear_state | guard { input b | summary anotherb | summary b; }"
ilscript[].content[5] "clear_state | guard { input c | summary c | attribute c; }"
ilscript[].content[6] "clear_state | guard { input d | tokenize normalize stem:\"BEST\" | summary d; }"
ilscript[].content[7] "clear_state | guard { input e | tokenize normalize stem:\"BEST\" | summary dynamice | summary e; }"
ilscript[].content[8] "clear_state | guard { input f | summary f; }"
ilscript[].content[9] "clear_state | guard { input g | summary g; }"
ilscript[].content[10] "clear_state | guard { input h | summary h; }"
ilscript[].content[11] "clear_state | guard { input mytags | for_each { tokenize normalize stem:\"BEST\" } | index mytags; }"
24 changes: 24 additions & 0 deletions config-model/src/test/derived/multiplesummaries/index-info.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,20 @@ indexinfo[].command[].indexname "loc"
indexinfo[].command[].command "index"
indexinfo[].command[].indexname "loc"
indexinfo[].command[].command "type string"
indexinfo[].command[].indexname "mytags"
indexinfo[].command[].command "index"
indexinfo[].command[].indexname "mytags"
indexinfo[].command[].command "lowercase"
indexinfo[].command[].indexname "mytags"
indexinfo[].command[].command "multivalue"
indexinfo[].command[].indexname "mytags"
indexinfo[].command[].command "stem:BEST"
indexinfo[].command[].indexname "mytags"
indexinfo[].command[].command "normalize"
indexinfo[].command[].indexname "mytags"
indexinfo[].command[].command "plain-tokens"
indexinfo[].command[].indexname "mytags"
indexinfo[].command[].command "type Array<string>"
indexinfo[].command[].indexname "abolded2"
indexinfo[].command[].command "index"
indexinfo[].command[].indexname "abolded2"
Expand All @@ -83,6 +97,16 @@ indexinfo[].command[].indexname "adynamic2"
indexinfo[].command[].command "index"
indexinfo[].command[].indexname "adynamic2"
indexinfo[].command[].command "type string"
indexinfo[].command[].indexname "alltags"
indexinfo[].command[].command "index"
indexinfo[].command[].indexname "alltags"
indexinfo[].command[].command "multivalue"
indexinfo[].command[].indexname "alltags"
indexinfo[].command[].command "type Array<string>"
indexinfo[].command[].indexname "anotherb"
indexinfo[].command[].command "index"
indexinfo[].command[].indexname "anotherb"
indexinfo[].command[].command "type string"
indexinfo[].command[].indexname "dynamice"
indexinfo[].command[].command "index"
indexinfo[].command[].indexname "dynamice"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ search multiplesummaries {
field loc type string {

}

field mytags type array<string> {
indexing: index
}
}

field loc_pos type position {
Expand Down Expand Up @@ -120,7 +124,7 @@ search multiplesummaries {
}

# Since a here is a dynamic summary, it will be fetched from disk
document-summary notattributesonly2 {
document-summary anothernotattributesonly2 {

summary adynamic2 type string { # Should still be dynamic here
source: a
Expand All @@ -130,6 +134,19 @@ search multiplesummaries {
summary c type string {
}

summary alltags type array<string> {
source: mytags
}
summary sometags type array<string> {
source: mytags
matched-elements-only
}
summary anothera type string {
source: a
}
summary anotherb type string {
source: b
}
}

# Not attributes only because d is bolded
Expand Down
22 changes: 18 additions & 4 deletions config-model/src/test/derived/multiplesummaries/summary.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
defaultsummaryid 2038247029
defaultsummaryid 456145241
usev8geopositions false
classes[].id 2038247029
classes[].id 456145241
classes[].name "default"
classes[].omitsummaryfeatures false
classes[].fields[].name "loc_pos"
Expand Down Expand Up @@ -37,6 +37,12 @@ classes[].fields[].name "e"
classes[].fields[].type "longstring"
classes[].fields[].name "adynamic2"
classes[].fields[].type "longstring"
classes[].fields[].name "alltags"
classes[].fields[].type "jsonstring"
classes[].fields[].name "sometags"
classes[].fields[].type "jsonstring"
classes[].fields[].name "anotherb"
classes[].fields[].type "longstring"
classes[].fields[].name "abolded2"
classes[].fields[].type "longstring"
classes[].fields[].name "aboldeddynamic"
Expand Down Expand Up @@ -86,13 +92,21 @@ classes[].fields[].name "rankfeatures"
classes[].fields[].type "featuredata"
classes[].fields[].name "summaryfeatures"
classes[].fields[].type "featuredata"
classes[].id 1527097108
classes[].name "notattributesonly2"
classes[].id 1609068631
classes[].name "anothernotattributesonly2"
classes[].omitsummaryfeatures false
classes[].fields[].name "adynamic2"
classes[].fields[].type "longstring"
classes[].fields[].name "c"
classes[].fields[].type "longstring"
classes[].fields[].name "alltags"
classes[].fields[].type "jsonstring"
classes[].fields[].name "sometags"
classes[].fields[].type "jsonstring"
classes[].fields[].name "anothera"
classes[].fields[].type "longstring"
classes[].fields[].name "anotherb"
classes[].fields[].type "longstring"
classes[].fields[].name "rankfeatures"
classes[].fields[].type "featuredata"
classes[].fields[].name "summaryfeatures"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ override[].arguments "c"
override[].field "adynamic2"
override[].command "dynamicteaser"
override[].arguments "adynamic2"
override[].field "sometags"
override[].command "matchedelementsfilter"
override[].arguments "mytags"
override[].field "anothera"
override[].command "attribute"
override[].arguments "a"
override[].field "anotdynamic"
override[].command "attribute"
override[].arguments "adynamic"
Expand Down
10 changes: 10 additions & 0 deletions config-model/src/test/examples/nextgen/summaryfield.sd
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,21 @@ search summaryfield {
indexing: index | summary
summary bar: full
}
field mytags type array<string> {
indexing: index
}
}
document-summary baz {
summary cox type string {
source: bar
}
summary alltags type array<string> {
source: mytags
}
summary sometags type array<string> {
source: mytags
matched-elements-only
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ public void testRequireThatSummaryFieldsAreIncluded() throws IOException, ParseE
assertNotNull(docType.getField("foo"));
assertNotNull(docType.getField("bar"));
assertNotNull(docType.getField("cox"));
assertEquals(3, docType.getFieldCount());
assertNotNull(docType.getField("mytags"));
assertNotNull(docType.getField("alltags"));
assertEquals(5, docType.getFieldCount());
}

@Test
Expand Down

0 comments on commit 77954df

Please sign in to comment.