Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Glebashnik/feed field generator #32842

Draft
wants to merge 15 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.yahoo.documentmodel.TemporaryUnknownType;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.process.Generator;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.schema.Index;
import com.yahoo.schema.Schema;
Expand Down Expand Up @@ -399,12 +400,13 @@ public boolean hasSingleAttribute() {

/** Parse an indexing expression which will use the simple linguistics implementation suitable for testing */
public void parseIndexingScript(String schemaName, String script) {
parseIndexingScript(schemaName, script, new SimpleLinguistics(), Embedder.throwsOnUse.asMap());
parseIndexingScript(schemaName, script, new SimpleLinguistics(), Embedder.throwsOnUse.asMap(), Generator.throwsOnUse.asMap());
}

public void parseIndexingScript(String schemaName, String script, Linguistics linguistics, Map<String, Embedder> embedders) {
public void parseIndexingScript(String schemaName, String script, Linguistics linguistics,
Map<String, Embedder> embedders, Map<String, Generator> generators) {
try {
ScriptParserContext config = new ScriptParserContext(linguistics, embedders);
ScriptParserContext config = new ScriptParserContext(linguistics, embedders, generators);
config.setInputStream(new IndexingInput(script));
setIndexingScript(schemaName, ScriptExpression.newInstance(config));
} catch (ParseException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.process.Generator;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.schema.document.SDField;
import com.yahoo.schema.parser.ParseException;
Expand Down Expand Up @@ -34,13 +35,14 @@ public void apply(String schemaName, SDField field) {

/** Creates an indexing operation which will use the simple linguistics implementation suitable for testing */
public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine) throws ParseException {
return fromStream(input, multiLine, new SimpleLinguistics(), Embedder.throwsOnUse.asMap());
return fromStream(input, multiLine, new SimpleLinguistics(), Embedder.throwsOnUse.asMap(),
Generator.throwsOnUse.asMap());
}

public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine,
Linguistics linguistics, Map<String, Embedder> embedders)
throws ParseException {
ScriptParserContext config = new ScriptParserContext(linguistics, embedders);
public static IndexingOperation fromStream(
SimpleCharStream input, boolean multiLine, Linguistics linguistics, Map<String, Embedder> embedders,
Map<String, Generator> generators) throws ParseException {
ScriptParserContext config = new ScriptParserContext(linguistics, embedders, generators);
config.setAnnotatorConfig(new AnnotatorConfig());
config.setInputStream(input);
ScriptExpression exp;
Expand Down
7 changes: 5 additions & 2 deletions config-model/src/main/javacc/SchemaParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.model.api.ModelContext;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.process.Generator;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.search.query.ranking.Diversity;
import com.yahoo.schema.DistributableResource;
Expand Down Expand Up @@ -91,13 +92,15 @@ public class SchemaParser {
* @param multiline Whether or not to allow multi-line expressions.
* @param linguistics What to use for tokenizing.
*/
private IndexingOperation newIndexingOperation(boolean multiline, Linguistics linguistics, Map<String, Embedder> embedders) throws ParseException {
private IndexingOperation newIndexingOperation(
boolean multiline, Linguistics linguistics, Map<String, Embedder> embedders,
Map<String, Generator> generators) throws ParseException {
SimpleCharStream input = (SimpleCharStream)token_source.input_stream;
if (token.next != null) {
input.backup(token.next.image.length());
}
try {
return IndexingOperation.fromStream(input, multiline, linguistics, embedders);
return IndexingOperation.fromStream(input, multiline, linguistics, embedders, generators);
} finally {
token.next = null;
jj_ntk = -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ private static <T extends Expression> T parse(ScriptParserContext context, Parse
parser.setDefaultFieldName(context.getDefaultFieldName());
parser.setLinguistics(context.getLinguistcs());
parser.setEmbedders(context.getEmbedders());
parser.setGenerators(context.getGenerators());

try {
return method.call(parser);
} catch (ParseException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.process.Generator;
import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig;
import com.yahoo.vespa.indexinglanguage.parser.CharStream;

Expand All @@ -17,12 +18,14 @@ public class ScriptParserContext {
private AnnotatorConfig annotatorConfig = new AnnotatorConfig();
private Linguistics linguistics;
private final Map<String, Embedder> embedders;
private final Map<String, Generator> generators;
private String defaultFieldName = null;
private CharStream inputStream = null;

public ScriptParserContext(Linguistics linguistics, Map<String, Embedder> embedders) {
public ScriptParserContext(Linguistics linguistics, Map<String, Embedder> embedders, Map<String, Generator> generators) {
this.linguistics = linguistics;
this.embedders = embedders;
this.generators = generators;
}

public AnnotatorConfig getAnnotatorConfig() {
Expand All @@ -46,6 +49,9 @@ public ScriptParserContext setLinguistics(Linguistics linguistics) {
public Map<String, Embedder> getEmbedders() {
return Collections.unmodifiableMap(embedders);
}

public Map<String, Generator> getGenerators() { return Collections.unmodifiableMap(generators);
}

public String getDefaultFieldName() {
return defaultFieldName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.process.Generator;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.*;
import com.yahoo.vespa.indexinglanguage.parser.IndexingInput;
Expand Down Expand Up @@ -273,7 +274,11 @@ public static Expression fromString(String expression) throws ParseException {
}

public static Expression fromString(String expression, Linguistics linguistics, Map<String, Embedder> embedders) throws ParseException {
return newInstance(new ScriptParserContext(linguistics, embedders).setInputStream(new IndexingInput(expression)));
return newInstance(new ScriptParserContext(linguistics, embedders, Map.of()).setInputStream(new IndexingInput(expression)));
}

public static Expression fromString(String expression, Linguistics linguistics, Map<String, Embedder> embedders, Map<String, Generator> generators) throws ParseException {
return newInstance(new ScriptParserContext(linguistics, embedders, generators).setInputStream(new IndexingInput(expression)));
}

public static Expression newInstance(ScriptParserContext context) throws ParseException {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
package com.yahoo.vespa.indexinglanguage.expressions;

import com.yahoo.document.DataType;
import com.yahoo.document.DocumentType;
import com.yahoo.document.Field;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Generator;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class GenerateExpression extends Expression {
lesters marked this conversation as resolved.
Show resolved Hide resolved
private final Linguistics linguistics;
private final Generator generator;
private final String generatorId;
private final List<String> generatorArguments;

/** The destination the generated value will be written to in the form [schema name].[field name] */
private String destination;

/** The target type we are generating into. */
private DataType targetType;

public GenerateExpression(
Linguistics linguistics,
Map<String, Generator> generators,
String generatorId,
List<String> generatorArguments
) {
super(null);
lesters marked this conversation as resolved.
Show resolved Hide resolved
this.linguistics = linguistics;
this.generatorId = generatorId;
this.generatorArguments = List.copyOf(generatorArguments);

boolean generatorIdProvided = generatorId != null && !generatorId.isEmpty();

if (generators.isEmpty()) {
throw new IllegalStateException("No generators provided"); // should never happen
}
else if (generators.size() == 1 && ! generatorIdProvided) {
this.generator = generators.entrySet().stream().findFirst().get().getValue();
}
else if (generators.size() > 1 && ! generatorIdProvided) {
this.generator = new Generator.FailingGenerator(
"Multiple generators are provided but no generator id is given. " +
"Valid generators are " + validGenerators(generators));
}
else if ( ! generators.containsKey(generatorId)) {
this.generator = new Generator.FailingGenerator("Can't find generator '" + generatorId + "'. " +
"Valid generators are " + validGenerators(generators));
} else {
this.generator = generators.get(generatorId);
}
}

@Override
public DataType setInputType(DataType type, VerificationContext context) {
// TODO: Not sure if this implementation of the methods is correct, needs careful review.
super.setInputType(type, context);

if (type == DataType.STRING)
throw new IllegalArgumentException("generate requires a string input type, but got " + type);

return DataType.STRING;
}
lesters marked this conversation as resolved.
Show resolved Hide resolved

@Override
public DataType setOutputType(DataType type, VerificationContext context) {
// TODO: Not sure if this implementation of the methods is correct, needs careful review.
super.setOutputType(type, type, context);

if (type != DataType.STRING)
throw new IllegalArgumentException("generate requires a string input type, but got " + type);

return DataType.STRING;
}
lesters marked this conversation as resolved.
Show resolved Hide resolved

@Override
public void setStatementOutput(DocumentType documentType, Field field) {
targetType = field.getDataType();
destination = documentType.getName() + "." + field.getName();
}

@Override
protected void doVerify(VerificationContext context) {
targetType = getOutputType(context);

if (!validTarget(targetType))
throw new VerificationException(this, "The generate target field must be a String");

lesters marked this conversation as resolved.
Show resolved Hide resolved
context.setCurrentType(createdOutputType());
}

@Override
protected void doExecute(ExecutionContext context) {
if (context.getCurrentValue() == null) return;

String output;
if (context.getCurrentValue().getDataType() == DataType.STRING) {
output = generateSingleValue(context);
}
else {
throw new IllegalArgumentException("Generate can only be done on string fields, not " +
context.getCurrentValue().getDataType());
}

context.setCurrentValue(new StringFieldValue(output));
}

private String generateSingleValue(ExecutionContext context) {
StringFieldValue input = (StringFieldValue)context.getCurrentValue();
return generate(input.getString(), targetType, context);
}

private String generate(String input, DataType targetType, ExecutionContext context) {
return generator.generate(
input,
new Generator.Context(destination, context.getCache())
.setLanguage(context.resolveLanguage(linguistics))
.setGeneratorId(generatorId),
targetType
);
}

@Override
public DataType createdOutputType() {
return targetType;
}

private boolean validTarget(DataType target) {
return target == DataType.STRING;
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("generate");
if (this.generatorId != null && !this.generatorId.isEmpty())
sb.append(" ").append(this.generatorId);
generatorArguments.forEach(arg -> sb.append(" ").append(arg));
return sb.toString();
}

@Override
public int hashCode() { return GenerateExpression.class.hashCode(); }

@Override
public boolean equals(Object o) {
return o instanceof GenerateExpression;
}

private static String validGenerators(Map<String, Generator> generators) {
List<String> generatorIds = new ArrayList<>();
generators.forEach((key, value) -> generatorIds.add(key));
generatorIds.sort(null);
return String.join(", ", generatorIds);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.process.Generator;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.ExpressionConverter;
import com.yahoo.vespa.indexinglanguage.ScriptParser;
Expand Down Expand Up @@ -116,7 +117,13 @@ public static ScriptExpression fromString(String expression) throws ParseExcepti
}

public static ScriptExpression fromString(String expression, Linguistics linguistics, Map<String, Embedder> embedders) throws ParseException {
return newInstance(new ScriptParserContext(linguistics, embedders).setInputStream(new IndexingInput(expression)));
return newInstance(new ScriptParserContext(linguistics, embedders, Map.of()).setInputStream(new IndexingInput(expression)));
}

public static Expression fromString(
String expression, Linguistics linguistics, Map<String, Embedder> embedders,
Map<String, Generator> generators) throws ParseException {
return newInstance(new ScriptParserContext(linguistics, embedders, generators).setInputStream(new IndexingInput(expression)));
}

public static ScriptExpression newInstance(ScriptParserContext config) throws ParseException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.yahoo.document.DataType;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.process.Generator;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.ExpressionConverter;
import com.yahoo.vespa.indexinglanguage.ScriptParser;
Expand Down Expand Up @@ -137,7 +138,13 @@ public static StatementExpression fromString(String expression) throws ParseExce
}

public static StatementExpression fromString(String expression, Linguistics linguistics, Map<String, Embedder> embedders) throws ParseException {
return newInstance(new ScriptParserContext(linguistics, embedders).setInputStream(new IndexingInput(expression)));
return newInstance(new ScriptParserContext(linguistics, embedders, Map.of()).setInputStream(new IndexingInput(expression)));
}

public static StatementExpression fromString(
String expression, Linguistics linguistics, Map<String, Embedder> embedders,
Map<String, Generator> generators) throws ParseException {
return newInstance(new ScriptParserContext(linguistics, embedders, generators).setInputStream(new IndexingInput(expression)));
}

public static StatementExpression newInstance(ScriptParserContext config) throws ParseException {
Expand Down
Loading