Skip to content

Commit

Permalink
Refactor FieldMapping logic
Browse files Browse the repository at this point in the history
Refactors FieldMapper logic. It removes the LegacyFieldMapper and
replaces it with a FlatFieldMapper. The FlatFieldMapper's role is to
create fields that do not build ANN indices.

Additionally, it puts dimension, model_id, and knn_method_context in a
new ANNConfig class and adds some safety checks around accessing them.
This should make calling logic easier to handle.

Lastly, it cleans up the parsing so that there isnt encoder parsing
directly in the KNNVectorFieldMapper.

Signed-off-by: John Mazanec <[email protected]>
  • Loading branch information
jmazanec15 committed Aug 8, 2024
1 parent 56698f7 commit 9de1292
Show file tree
Hide file tree
Showing 33 changed files with 1,454 additions and 918 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Refactor method structure and definitions [#1920](https://github.com/opensearch-project/k-NN/pull/1920)
* Refactor KNNVectorFieldType from KNNVectorFieldMapper to a separate class for better readability. [#1931](https://github.com/opensearch-project/k-NN/pull/1931)
* Generalize lib interface to return context objects [#1925](https://github.com/opensearch-project/k-NN/pull/1925)
* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
* Restructure mappers to better handle null cases and avoid branching in parsing [#1939](https://github.com/opensearch-project/k-NN/pull/1939)
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams;
import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.engine.KNNMethodContext;
import org.opensearch.knn.index.mapper.KNNMappingConfig;
import org.opensearch.knn.index.mapper.KNNVectorFieldType;

import java.util.Optional;
Expand Down Expand Up @@ -66,16 +68,19 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
);
return defaultFormatSupplier.get();
}
var type = (KNNVectorFieldType) mapperService.orElseThrow(
KNNVectorFieldType mappedFieldType = (KNNVectorFieldType) mapperService.orElseThrow(
() -> new IllegalStateException(
String.format("Cannot read field type for field [%s] because mapper service is not available", field)
)
).fieldType(field);
var params = type.getKnnMethodContext().getMethodComponentContext().getParameters();

if (type.getKnnMethodContext().getKnnEngine() == KNNEngine.LUCENE
&& params != null
&& params.containsKey(METHOD_ENCODER_PARAMETER)) {
KNNMappingConfig knnMappingConfig = mappedFieldType.getKnnMappingConfig();
KNNMethodContext knnMethodContext = knnMappingConfig.getKnnMethodContext()
.orElseThrow(() -> new IllegalArgumentException("KNN method context cannot be empty"));

var params = knnMethodContext.getMethodComponentContext().getParameters();

if (knnMethodContext.getKnnEngine() == KNNEngine.LUCENE && params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) {
KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams(
params,
defaultMaxConnections,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.opensearch.knn.index.codec.transfer.VectorTransferByte;
import org.opensearch.knn.index.codec.transfer.VectorTransferFloat;
import org.opensearch.knn.jni.JNIService;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.util.KNNCodecUtil;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.indices.Model;
Expand Down Expand Up @@ -57,7 +56,6 @@

import static org.apache.lucene.codecs.CodecUtil.FOOTER_MAGIC;
import static org.opensearch.knn.common.KNNConstants.MODEL_ID;
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.buildEngineFileName;
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.calculateArraySize;
import static org.opensearch.knn.index.engine.faiss.Faiss.FAISS_BINARY_INDEX_DESCRIPTION_PREFIX;
Expand Down Expand Up @@ -214,35 +212,19 @@ private void createKNNIndexFromTemplate(Model model, KNNCodecUtil.Pair pair, KNN

private void createKNNIndexFromScratch(FieldInfo fieldInfo, KNNCodecUtil.Pair pair, KNNEngine knnEngine, String indexPath)
throws IOException {
Map<String, Object> parameters = new HashMap<>();
Map<String, String> fieldAttributes = fieldInfo.attributes();
String parametersString = fieldAttributes.get(KNNConstants.PARAMETERS);

// parametersString will be null when legacy mapper is used
if (parametersString == null) {
parameters.put(KNNConstants.SPACE_TYPE, fieldAttributes.getOrDefault(KNNConstants.SPACE_TYPE, SpaceType.DEFAULT.getValue()));

String efConstruction = fieldAttributes.get(KNNConstants.HNSW_ALGO_EF_CONSTRUCTION);
Map<String, Object> algoParams = new HashMap<>();
if (efConstruction != null) {
algoParams.put(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION, Integer.parseInt(efConstruction));
}

String m = fieldAttributes.get(KNNConstants.HNSW_ALGO_M);
if (m != null) {
algoParams.put(KNNConstants.METHOD_PARAMETER_M, Integer.parseInt(m));
}
parameters.put(PARAMETERS, algoParams);
} else {
parameters.putAll(
XContentHelper.createParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
new BytesArray(parametersString),
MediaTypeRegistry.getDefaultMediaType()
).map()
);
throw new IllegalStateException("Parameter string is not set. Something is wrong");
}
Map<String, Object> parameters = new HashMap<>(
XContentHelper.createParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
new BytesArray(parametersString),
MediaTypeRegistry.getDefaultMediaType()
).map()
);

// Update index description of Faiss for binary data type
if (KNNEngine.FAISS == knnEngine
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import lombok.Getter;
import lombok.NonNull;
import lombok.Setter;
import org.opensearch.Version;
import org.opensearch.common.ValidationException;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
Expand All @@ -20,7 +19,6 @@
import org.opensearch.index.mapper.MapperParsingException;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
Expand All @@ -29,7 +27,6 @@
import org.opensearch.knn.training.VectorSpaceInfo;

import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE;
import static org.opensearch.knn.common.KNNConstants.NAME;
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
Expand All @@ -42,21 +39,6 @@
@Getter
public class KNNMethodContext implements ToXContentFragment, Writeable {

private static KNNMethodContext defaultInstance = null;

/**
* This is used only for testing
* @return default KNNMethodContext for testing
*/
public static synchronized KNNMethodContext getDefault() {
if (defaultInstance == null) {
MethodComponentContext methodComponentContext = new MethodComponentContext(METHOD_HNSW, Collections.emptyMap());
methodComponentContext.setIndexVersion(Version.CURRENT);
defaultInstance = new KNNMethodContext(KNNEngine.DEFAULT, SpaceType.DEFAULT, methodComponentContext);
}
return defaultInstance;
}

@NonNull
private final KNNEngine knnEngine;
@NonNull
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.mapper;

import org.apache.lucene.document.FieldType;
import org.opensearch.Version;
import org.opensearch.common.Explicit;
import org.opensearch.knn.index.VectorDataType;

import java.util.Map;
import java.util.Optional;

/**
* Mapper used when you dont want to build an underlying KNN struct - you just want to
* store vectors as doc values
*/
public class FlatVectorFieldMapper extends KNNVectorFieldMapper {

private final PerDimensionValidator perDimensionValidator;

public static FlatVectorFieldMapper createFieldMapper(
String fullname,
String simpleName,
Map<String, String> metaValue,
VectorDataType vectorDataType,
Integer dimension,
MultiFields multiFields,
CopyTo copyTo,
Explicit<Boolean> ignoreMalformed,
boolean stored,
boolean hasDocValues,
Version indexCreatedVersion
) {
final KNNVectorFieldType mappedFieldType = new KNNVectorFieldType(fullname, metaValue, vectorDataType, new KNNMappingConfig() {
@Override
public Optional<Integer> getDimension() {
return Optional.of(dimension);
}
});
return new FlatVectorFieldMapper(
simpleName,
mappedFieldType,
multiFields,
copyTo,
ignoreMalformed,
stored,
hasDocValues,
indexCreatedVersion
);
}

private FlatVectorFieldMapper(
String simpleName,
KNNVectorFieldType mappedFieldType,
MultiFields multiFields,
CopyTo copyTo,
Explicit<Boolean> ignoreMalformed,
boolean stored,
boolean hasDocValues,
Version indexCreatedVersion
) {
super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues, indexCreatedVersion, null);
this.perDimensionValidator = selectPerDimensionValidator(vectorDataType);
this.fieldType = new FieldType(KNNVectorFieldMapper.Defaults.FIELD_TYPE);
this.fieldType.freeze();
}

private PerDimensionValidator selectPerDimensionValidator(VectorDataType vectorDataType) {
if (VectorDataType.BINARY == vectorDataType) {
return PerDimensionValidator.DEFAULT_BIT_VALIDATOR;
}

if (VectorDataType.BYTE == vectorDataType) {
return PerDimensionValidator.DEFAULT_BYTE_VALIDATOR;
}

return PerDimensionValidator.DEFAULT_FLOAT_VALIDATOR;
}

@Override
protected VectorValidator getVectorValidator() {
return VectorValidator.NOOP_VECTOR_VALIDATOR;
}

@Override
protected PerDimensionValidator getPerDimensionValidator() {
return perDimensionValidator;
}

@Override
protected PerDimensionProcessor getPerDimensionProcessor() {
return PerDimensionProcessor.NOOP_PROCESSOR;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.mapper;

import org.opensearch.knn.index.engine.KNNMethodContext;

import java.util.Optional;

/**
* Class holds information about how the ANN indices are created. The design of this class ensures that we do not
* accidentally configure an index that has multiple ways it can be created. This class is immutable.
*/
public interface KNNMappingConfig {
/**
*
* @return Optional containing the modelId if created from model, otherwise empty
*/
default Optional<String> getModelId() {
return Optional.empty();
}

/**
*
* @return Optional containing the KNNMethodContext if created from method, otherwise empty
*/
default Optional<KNNMethodContext> getKnnMethodContext() {
return Optional.empty();
}

/**
*
* @return the dimension of the index; for model based indices, it will be null
*/
default Optional<Integer> getDimension() {
return Optional.empty();
}
}
Loading

0 comments on commit 9de1292

Please sign in to comment.