Skip to content

Commit

Permalink
Integrate Lucene Vector field with native engines, to use KNNVectorFo…
Browse files Browse the repository at this point in the history
…rmat during segment creation

Signed-off-by: Navneet Verma <[email protected]>
  • Loading branch information
navneet1v committed Aug 9, 2024
1 parent 56698f7 commit 5cb2739
Show file tree
Hide file tree
Showing 12 changed files with 313 additions and 59 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased 2.x](https://github.com/opensearch-project/k-NN/compare/2.16...2.x)
### Features
* Integrate Lucene Vector field with native engines, to use KNNVectorFormat during segment creation. [#1945](https://github.com/opensearch-project/k-NN/pull/1945)
### Enhancements
### Bug Fixes
* Corrected search logic for scenario with non-existent fields in filter [#1874](https://github.com/opensearch-project/k-NN/pull/1874)
Expand All @@ -31,4 +32,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Refactor method structure and definitions [#1920](https://github.com/opensearch-project/k-NN/pull/1920)
* Refactor KNNVectorFieldType from KNNVectorFieldMapper to a separate class for better readability. [#1931](https://github.com/opensearch-project/k-NN/pull/1931)
* Generalize lib interface to return context objects [#1925](https://github.com/opensearch-project/k-NN/pull/1925)
* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
33 changes: 32 additions & 1 deletion src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ public class KNNSettings {
public static final String MODEL_CACHE_SIZE_LIMIT = "knn.model.cache.size.limit";
public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD = "index.knn.advanced.filtered_exact_search_threshold";
public static final String KNN_FAISS_AVX2_DISABLED = "knn.faiss.avx2.disabled";
/**
* TODO: This setting is only added to ensure that main branch of k_NN plugin doesn't break till other parts of the
* code is getting ready. Will remove this setting once all changes related to integration of KNNVectorsFormat is added
* for native engines.
*/
public static final String KNN_USE_LUCENE_VECTOR_FORMAT_ENABLED = "knn.use.format.enabled";

/**
* Default setting values
Expand Down Expand Up @@ -255,6 +261,17 @@ public class KNNSettings {
NodeScope
);

/**
* TODO: This setting is only added to ensure that main branch of k_NN plugin doesn't break till other parts of the
* code is getting ready. Will remove this setting once all changes related to integration of KNNVectorsFormat is added
* for native engines.
*/
public static final Setting<Boolean> KNN_USE_LUCENE_VECTOR_FORMAT_ENABLED_SETTING = Setting.boolSetting(
KNN_USE_LUCENE_VECTOR_FORMAT_ENABLED,
false,
NodeScope
);

/**
* Dynamic settings
*/
Expand Down Expand Up @@ -379,6 +396,10 @@ private Setting<?> getSetting(String key) {
return KNN_VECTOR_STREAMING_MEMORY_LIMIT_PCT_SETTING;
}

if (KNN_USE_LUCENE_VECTOR_FORMAT_ENABLED.equals(key)) {
return KNN_USE_LUCENE_VECTOR_FORMAT_ENABLED_SETTING;
}

throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
}

Expand All @@ -397,7 +418,8 @@ public List<Setting<?>> getSettings() {
MODEL_CACHE_SIZE_LIMIT_SETTING,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING,
KNN_FAISS_AVX2_DISABLED_SETTING,
KNN_VECTOR_STREAMING_MEMORY_LIMIT_PCT_SETTING
KNN_VECTOR_STREAMING_MEMORY_LIMIT_PCT_SETTING,
KNN_USE_LUCENE_VECTOR_FORMAT_ENABLED_SETTING
);
return Stream.concat(settings.stream(), Stream.concat(getFeatureFlags().stream(), dynamicCacheSettings.values().stream()))
.collect(Collectors.toList());
Expand Down Expand Up @@ -443,6 +465,15 @@ public static Integer getFilteredExactSearchThreshold(final String indexName) {
.getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE);
}

/**
* TODO: This setting is only added to ensure that main branch of k_NN plugin doesn't break till other parts of the
* code is getting ready. Will remove this setting once all changes related to integration of KNNVectorsFormat is added
* for native engines.
*/
public static boolean getIsLuceneVectorFormatEnabled() {
return KNNSettings.state().getSettingValue(KNNSettings.KNN_USE_LUCENE_VECTOR_FORMAT_ENABLED);
}

public void initialize(Client client, ClusterService clusterService) {
this.client = client;
this.clusterService = clusterService;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnByteVectorField;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.VectorEncoding;
import org.opensearch.Version;
import org.opensearch.common.Explicit;
import org.opensearch.common.ValidationException;
Expand Down Expand Up @@ -172,10 +175,13 @@ public static class Builder extends ParametrizedFieldMapper.Builder {

protected Version indexCreatedVersion;

public Builder(String name, ModelDao modelDao, Version indexCreatedVersion) {
protected boolean isIndexKNN;

public Builder(String name, ModelDao modelDao, Version indexCreatedVersion, boolean isIndexKNN) {
super(name);
this.modelDao = modelDao;
this.indexCreatedVersion = indexCreatedVersion;
this.isIndexKNN = isIndexKNN;
}

/**
Expand All @@ -187,12 +193,13 @@ public Builder(String name, ModelDao modelDao, Version indexCreatedVersion) {
* @param m m value of field
* @param efConstruction efConstruction value of field
*/
public Builder(String name, String spaceType, String m, String efConstruction, Version indexCreatedVersion) {
public Builder(String name, String spaceType, String m, String efConstruction, Version indexCreatedVersion, boolean isIndexKNN) {
super(name);
this.spaceType = spaceType;
this.m = m;
this.efConstruction = efConstruction;
this.indexCreatedVersion = indexCreatedVersion;
this.isIndexKNN = isIndexKNN;
}

@Override
Expand Down Expand Up @@ -253,6 +260,7 @@ public KNNVectorFieldMapper build(BuilderContext context) {
.hasDocValues(hasDocValues.get())
.vectorDataType(vectorDataType.getValue())
.knnMethodContext(knnMethodContext)
.isIndexKNN(isIndexKNN)
.build();
return new LuceneFieldMapper(createLuceneFieldMapperInput);
}
Expand All @@ -265,7 +273,8 @@ public KNNVectorFieldMapper build(BuilderContext context) {
ignoreMalformed,
stored.get(),
hasDocValues.get(),
knnMethodContext
knnMethodContext,
isIndexKNN
);
}

Expand All @@ -286,7 +295,8 @@ public KNNVectorFieldMapper build(BuilderContext context) {
hasDocValues.get(),
modelDao,
modelIdAsString,
indexCreatedVersion
indexCreatedVersion,
isIndexKNN
);
}

Expand Down Expand Up @@ -325,7 +335,8 @@ public KNNVectorFieldMapper build(BuilderContext context) {
spaceType,
m,
efConstruction,
indexCreatedVersion
indexCreatedVersion,
isIndexKNN
);
}

Expand Down Expand Up @@ -430,7 +441,12 @@ public TypeParser(Supplier<ModelDao> modelDaoSupplier) {

@Override
public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
Builder builder = new KNNVectorFieldMapper.Builder(name, modelDaoSupplier.get(), parserContext.indexVersionCreated());
Builder builder = new KNNVectorFieldMapper.Builder(
name,
modelDaoSupplier.get(),
parserContext.indexVersionCreated(),
parserContext.getSettings().getAsBoolean(KNN_INDEX, false)
);
builder.parse(name, parserContext, node);

// All <a
Expand Down Expand Up @@ -464,6 +480,7 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
// subclass (if it is unique).
protected KNNMethodContext knnMethod;
protected String modelId;
protected boolean isIndexKNN;

public KNNVectorFieldMapper(
String simpleName,
Expand All @@ -473,9 +490,11 @@ public KNNVectorFieldMapper(
Explicit<Boolean> ignoreMalformed,
boolean stored,
boolean hasDocValues,
Version indexCreatedVersion
Version indexCreatedVersion,
boolean isIndexKNN
) {
super(simpleName, mappedFieldType, multiFields, copyTo);
this.isIndexKNN = isIndexKNN;
this.ignoreMalformed = ignoreMalformed;
this.stored = stored;
this.hasDocValues = hasDocValues;
Expand Down Expand Up @@ -516,12 +535,11 @@ private MethodComponentContext getMethodComponentContext(KNNMethodContext knnMet
* Function returns a list of fields to be indexed when the vector is float type.
*
* @param array array of floats
* @param fieldType {@link FieldType}
* @return {@link List} of {@link Field}
*/
protected List<Field> getFieldsForFloatVector(final float[] array, final FieldType fieldType) {
protected List<Field> getFieldsForFloatVector(final float[] array) {
final List<Field> fields = new ArrayList<>();
fields.add(new VectorField(name(), array, fieldType));
fields.add(createVectorField(array, dimension));
if (this.stored) {
fields.add(createStoredFieldForFloatVector(name(), array));
}
Expand All @@ -532,18 +550,55 @@ protected List<Field> getFieldsForFloatVector(final float[] array, final FieldTy
* Function returns a list of fields to be indexed when the vector is byte type.
*
* @param array array of bytes
* @param fieldType {@link FieldType}
* @return {@link List} of {@link Field}
*/
protected List<Field> getFieldsForByteVector(final byte[] array, final FieldType fieldType) {
protected List<Field> getFieldsForByteVector(final byte[] array) {
final List<Field> fields = new ArrayList<>();
fields.add(new VectorField(name(), array, fieldType));
fields.add(createVectorField(array, dimension));
if (this.stored) {
fields.add(createStoredFieldForByteVector(name(), array));
}
return fields;
}

private Field createVectorField(float[] vectorValue, int dimension) {
final FieldType tempFieldType = new FieldType(fieldType);
if (KNNVectorFieldMapperUtil.useLuceneKNNVectorsFormat(this.indexCreatedVersion, isIndexKNN)) {
// we need to use a default space type here to ensure that K-NN plugin can support more space types than
// what lucene can support. Also, for native engines we use attributes map to know what is the space type.
tempFieldType.setVectorAttributes(
dimension,
VectorEncoding.FLOAT32,
SpaceType.DEFAULT.getKnnVectorSimilarityFunction().getVectorSimilarityFunction()
);
tempFieldType.freeze();
return new KnnFloatVectorField(name(), vectorValue, tempFieldType);
}
// Falling to the field that use DocValuesFormat to create the Vector DataStructures.
tempFieldType.setDocValuesType(DocValuesType.BINARY);
tempFieldType.freeze();
return new VectorField(name(), vectorValue, tempFieldType);
}

private Field createVectorField(byte[] vectorValue, int dimension) {
final FieldType tempFieldType = new FieldType(fieldType);
if (KNNVectorFieldMapperUtil.useLuceneKNNVectorsFormat(this.indexCreatedVersion, isIndexKNN)) {
// we need to use a default space type here to ensure that K-NN plugin can support more space types than
// what lucene can support. Also, for native engines we use attributes map to know what is the space type.
tempFieldType.setVectorAttributes(
this.vectorDataType == VectorDataType.BINARY ? dimension / 8 : dimension,
VectorEncoding.BYTE,
SpaceType.DEFAULT.getKnnVectorSimilarityFunction().getVectorSimilarityFunction()
);
tempFieldType.freeze();
return new KnnByteVectorField(name(), vectorValue, tempFieldType);
}
// Falling to the field that use DocValuesFormat to create the Vector DataStructures.
tempFieldType.setDocValuesType(DocValuesType.BINARY);
tempFieldType.freeze();
return new VectorField(name(), vectorValue, tempFieldType);
}

protected void parseCreateField(
ParseContext context,
int dimension,
Expand All @@ -564,7 +619,7 @@ protected void parseCreateField(
}
final byte[] array = bytesArrayOptional.get();
spaceType.validateVector(array);
context.doc().addAll(getFieldsForByteVector(array, fieldType));
context.doc().addAll(getFieldsForByteVector(array));
} else if (VectorDataType.BYTE == vectorDataType) {
Optional<byte[]> bytesArrayOptional = getBytesFromContext(context, dimension, vectorDataType);

Expand All @@ -573,7 +628,7 @@ protected void parseCreateField(
}
final byte[] array = bytesArrayOptional.get();
spaceType.validateVector(array);
context.doc().addAll(getFieldsForByteVector(array, fieldType));
context.doc().addAll(getFieldsForByteVector(array));
} else if (VectorDataType.FLOAT == vectorDataType) {
Optional<float[]> floatsArrayOptional = getFloatsFromContext(context, dimension, methodComponentContext);

Expand All @@ -582,7 +637,7 @@ protected void parseCreateField(
}
final float[] array = floatsArrayOptional.get();
spaceType.validateVector(array);
context.doc().addAll(getFieldsForFloatVector(array, fieldType));
context.doc().addAll(getFieldsForFloatVector(array));
} else {
throw new IllegalArgumentException(
String.format(Locale.ROOT, "Cannot parse context for unsupported values provided for field [%s]", VECTOR_DATA_TYPE_FIELD)
Expand Down Expand Up @@ -746,7 +801,7 @@ Optional<float[]> getFloatsFromContext(ParseContext context, int dimension, Meth

@Override
public ParametrizedFieldMapper.Builder getMergeBuilder() {
return new KNNVectorFieldMapper.Builder(simpleName(), modelDao, indexCreatedVersion).init(this);
return new KNNVectorFieldMapper.Builder(simpleName(), modelDao, indexCreatedVersion, isIndexKNN).init(this);
}

@Override
Expand Down Expand Up @@ -783,7 +838,6 @@ public static class Defaults {
static {
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setIndexOptions(IndexOptions.NONE);
FIELD_TYPE.setDocValuesType(DocValuesType.BINARY);
FIELD_TYPE.putAttribute(KNN_FIELD, "true"); // This attribute helps to determine knn field type
FIELD_TYPE.freeze();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.util.BytesRef;
import org.opensearch.Version;
import org.opensearch.index.mapper.ParametrizedFieldMapper;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.engine.KNNMethodContext;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
Expand Down Expand Up @@ -245,6 +247,22 @@ public static int getExpectedVectorLength(final KNNVectorFieldType knnVectorFiel
return VectorDataType.BINARY == knnVectorFieldType.getVectorDataType() ? expectedDimensions / 8 : expectedDimensions;
}

/**
* We will use LuceneKNNVectorsFormat when these below condition satisfy:
* <ol>
* <li>Index is created with Version of opensearch >= 2.17</li>
* <li>index.knn setting is marked as true</li>
* <li>Cluster setting is enabled to use Lucene KNNVectors format. This condition is temporary condition and will be
* removed before release.</li>
* </ol>
* @param indexCreatedVersion {@link Version}
* @param isIndexKNN boolean
* @return true if vector field should use KNNVectorsFormat
*/
static boolean useLuceneKNNVectorsFormat(final Version indexCreatedVersion, final boolean isIndexKNN) {
return indexCreatedVersion.onOrAfter(Version.V_2_17_0) && isIndexKNN == true && KNNSettings.getIsLuceneVectorFormatEnabled();
}

private static boolean isModelBasedIndex(int expectedDimensions) {
return expectedDimensions == -1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ public class LegacyFieldMapper extends KNNVectorFieldMapper {
String spaceType,
String m,
String efConstruction,
Version indexCreatedVersion
Version indexCreatedVersion,
boolean isIndexKNN
) {
super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues, indexCreatedVersion);

super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues, indexCreatedVersion, isIndexKNN);
this.spaceType = spaceType;
this.m = m;
this.efConstruction = efConstruction;
Expand All @@ -74,9 +74,14 @@ public class LegacyFieldMapper extends KNNVectorFieldMapper {

@Override
public ParametrizedFieldMapper.Builder getMergeBuilder() {
return new KNNVectorFieldMapper.Builder(simpleName(), this.spaceType, this.m, this.efConstruction, this.indexCreatedVersion).init(
this
);
return new KNNVectorFieldMapper.Builder(
simpleName(),
this.spaceType,
this.m,
this.efConstruction,
this.indexCreatedVersion,
this.isIndexKNN
).init(this);
}

static String getSpaceType(final Settings indexSettings, final VectorDataType vectorDataType) {
Expand Down
Loading

0 comments on commit 5cb2739

Please sign in to comment.