Skip to content

Commit

Permalink
add support for builder constructor in neural query builder (#1047)
Browse files Browse the repository at this point in the history
* add support for builder constructor in neural query builder

Signed-off-by: will-hwang <[email protected]>

* create custom builder class to enforce valid neural query builder instantiation

Signed-off-by: will-hwang <[email protected]>

* refactor code to remove duplicate

Signed-off-by: will-hwang <[email protected]>

* include new constructor in qa packages

Signed-off-by: will-hwang <[email protected]>

* refactor code to remove unnecessary code

Signed-off-by: will-hwang <[email protected]>

* fix bug in neural query builder instantiation

Signed-off-by: will-hwang <[email protected]>

---------

Signed-off-by: will-hwang <[email protected]>
  • Loading branch information
will-hwang authored Jan 7, 2025
1 parent 8803377 commit 2ecd32c
Show file tree
Hide file tree
Showing 23 changed files with 592 additions and 540 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Implement pruning for neural sparse ingestion pipeline and two phase search processor ([#988](https://github.com/opensearch-project/neural-search/pull/988))
- Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041))
- Optimize ML inference connection retry logic ([#1054](https://github.com/opensearch-project/neural-search/pull/1054))
- Support for builder constructor in Neural Query Builder ([#1047](https://github.com/opensearch-project/neural-search/pull/1047))
### Bug Fixes
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

import org.opensearch.index.query.MatchQueryBuilder;

import static org.opensearch.knn.index.query.KNNQueryBuilder.EXPAND_NESTED_FIELD;
import static org.opensearch.neuralsearch.common.MinClusterVersionUtil.isClusterOnOrAfterMinReqVersion;
import static org.opensearch.neuralsearch.util.TestUtils.getModelId;
import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.PARAM_NAME_WEIGHTS;
Expand Down Expand Up @@ -124,11 +122,12 @@ private HybridQueryBuilder getQueryBuilder(
final Map<String, ?> methodParameters,
final RescoreContext rescoreContext
) {
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
neuralQueryBuilder.fieldName("passage_embedding");
neuralQueryBuilder.modelId(modelId);
neuralQueryBuilder.queryText(QUERY);
neuralQueryBuilder.k(5);
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.modelId(modelId)
.queryText(QUERY)
.k(5)
.build();
if (expandNestedDocs != null) {
neuralQueryBuilder.expandNested(expandNestedDocs);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,37 +51,25 @@ public void testKnnRadialSearch_E2EFlow() throws Exception {
}

private void validateIndexQuery(final String modelId) {
NeuralQueryBuilder neuralQueryBuilderWithMinScoreQuery = new NeuralQueryBuilder(
"passage_embedding",
TEXT,
TEST_IMAGE_TEXT,
modelId,
null,
null,
0.01f,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilderWithMinScoreQuery = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(TEXT)
.queryImage(TEST_IMAGE_TEXT)
.modelId(modelId)
.minScore(0.01f)
.build();

Map<String, Object> responseWithMinScoreQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMinScoreQuery, 1);
assertNotNull(responseWithMinScoreQuery);

NeuralQueryBuilder neuralQueryBuilderWithMaxDistanceQuery = new NeuralQueryBuilder(
"passage_embedding",
TEXT,
TEST_IMAGE_TEXT,
modelId,
null,
100000f,
null,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilderWithMaxDistanceQuery = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(TEXT)
.queryImage(TEST_IMAGE_TEXT)
.modelId(modelId)
.maxDistance(100000f)
.build();

Map<String, Object> responseWithMaxDistanceQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMaxDistanceQuery, 1);
assertNotNull(responseWithMaxDistanceQuery);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,13 @@ public void testTextImageEmbeddingProcessor_E2EFlow() throws Exception {
private void validateTestIndex(final String modelId) throws Exception {
int docCount = getDocCount(getIndexNameForTest());
assertEquals(2, docCount);
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
"passage_embedding",
TEXT,
TEST_IMAGE_TEXT,
modelId,
1,
null,
null,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(TEXT)
.queryImage(TEST_IMAGE_TEXT)
.modelId(modelId)
.k(1)
.build();
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
assertNotNull(response);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,14 @@ public void testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow() throws

public void testNeuralQueryEnricherProcessor_NeuralSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
NeuralQueryBuilder neuralQueryBuilderWithModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = NeuralQueryBuilder.builder()
.fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1)
.build();
NeuralQueryBuilder neuralQueryBuilderWithModelId = NeuralQueryBuilder.builder()
.fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1)
.build();

if (isRunningAgainstOldCluster()) {
String modelId = uploadTextEmbeddingModel();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ public void testTextEmbeddingProcessor_E2EFlow() throws Exception {
private void validateTestIndex(final String modelId) throws Exception {
int docCount = getDocCount(getIndexNameForTest());
assertEquals(2, docCount);
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
neuralQueryBuilder.fieldName("passage_embedding");
neuralQueryBuilder.modelId(modelId);
neuralQueryBuilder.queryText(TEXT);
neuralQueryBuilder.k(1);
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(TEXT)
.modelId(modelId)
.k(1)
.build();
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
assertNotNull(response);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,12 @@ private HybridQueryBuilder getQueryBuilder(
final Map<String, ?> methodParameters,
final RescoreContext rescoreContextForNeuralQuery
) {
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
neuralQueryBuilder.fieldName(VECTOR_EMBEDDING_FIELD);
neuralQueryBuilder.modelId(modelId);
neuralQueryBuilder.queryText(QUERY);
neuralQueryBuilder.k(5);
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName(VECTOR_EMBEDDING_FIELD)
.modelId(modelId)
.queryText(QUERY)
.k(5)
.build();
if (expandNestedDocs != null) {
neuralQueryBuilder.expandNested(expandNestedDocs);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,37 +77,25 @@ private void validateIndexQueryOnUpgrade(final int numberOfDocs, final String mo
assertEquals(numberOfDocs, docCount);
loadModel(modelId);

NeuralQueryBuilder neuralQueryBuilderWithMinScoreQuery = new NeuralQueryBuilder(
"passage_embedding",
text,
imageText,
modelId,
null,
null,
0.01f,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilderWithMinScoreQuery = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(text)
.queryImage(imageText)
.modelId(modelId)
.minScore(0.01f)
.build();

Map<String, Object> responseWithMinScore = search(getIndexNameForTest(), neuralQueryBuilderWithMinScoreQuery, 1);
assertNotNull(responseWithMinScore);

NeuralQueryBuilder neuralQueryBuilderWithMaxDistanceQuery = new NeuralQueryBuilder(
"passage_embedding",
text,
imageText,
modelId,
null,
100000f,
null,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilderWithMaxDistanceQuery = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(text)
.queryImage(imageText)
.modelId(modelId)
.maxDistance(100000f)
.build();

Map<String, Object> responseWithMaxScore = search(getIndexNameForTest(), neuralQueryBuilderWithMaxDistanceQuery, 1);
assertNotNull(responseWithMaxScore);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,14 @@ private void validateTestIndexOnUpgrade(final int numberOfDocs, final String mod
int docCount = getDocCount(getIndexNameForTest());
assertEquals(numberOfDocs, docCount);
loadModel(modelId);
NeuralQueryBuilder neuralQueryBuilderWithKQuery = new NeuralQueryBuilder(
"passage_embedding",
text,
imageText,
modelId,
1,
null,
null,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilderWithKQuery = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(text)
.queryImage(imageText)
.modelId(modelId)
.k(1)
.build();

Map<String, Object> responseWithKQuery = search(getIndexNameForTest(), neuralQueryBuilderWithKQuery, 1);
assertNotNull(responseWithKQuery);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,14 @@ public void testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow() throws
// the feature is introduced from 2.11
public void testNeuralQueryEnricherProcessor_NeuralSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
NeuralQueryBuilder neuralQueryBuilderWithModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = NeuralQueryBuilder.builder()
.fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1)
.build();
NeuralQueryBuilder neuralQueryBuilderWithModelId = NeuralQueryBuilder.builder()
.fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1)
.build();

switch (getClusterType()) {
case OLD:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,12 @@ private void validateTestIndexOnUpgrade(final int numberOfDocs, final String mod
int docCount = getDocCount(getIndexNameForTest());
assertEquals(numberOfDocs, docCount);
loadModel(modelId);
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
neuralQueryBuilder.fieldName("passage_embedding");
neuralQueryBuilder.modelId(modelId);
neuralQueryBuilder.queryText(text);
neuralQueryBuilder.k(1);
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.modelId(modelId)
.queryText(text)
.k(1)
.build();
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
assertNotNull(response);
}
Expand Down
Loading

0 comments on commit 2ecd32c

Please sign in to comment.