Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Query: Fixes (workaround) for query plan issue where placeholder index does not start at zero #4885

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

namespace Microsoft.Azure.Cosmos.Query.Core.Pipeline.CrossPartition.HybridSearch
{
using System;
using System.Collections.Generic;
using Microsoft.Azure.Cosmos.CosmosElements;

internal sealed class FullTextStatistics
Expand All @@ -14,7 +12,7 @@ internal sealed class FullTextStatistics

public long TotalWordCount { get; }

public ReadOnlyMemory<long> HitCounts => this.hitCounts;
public System.ReadOnlyMemory<long> HitCounts => this.hitCounts;

public FullTextStatistics(long totalWordCount, long[] hitCounts)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ private static TryCatch<List<IQueryPipelineStage>> CreateQueryPipelineStages(
List<QueryInfo> rewrittenQueryInfos = new List<QueryInfo>(queryInfos.Count);
foreach (QueryInfo queryInfo in queryInfos)
{
QueryInfo rewrittenQueryInfo = RewriteOrderByQueryInfo(queryInfo, statistics);
QueryInfo rewrittenQueryInfo = RewriteOrderByQueryInfo(queryInfo, statistics, queryInfos.Count);
rewrittenQueryInfos.Add(rewrittenQueryInfo);
}

Expand Down Expand Up @@ -608,19 +608,19 @@ private static void ComputeRrfScores(
}
}

private static QueryInfo RewriteOrderByQueryInfo(QueryInfo queryInfo, GlobalFullTextSearchStatistics statistics)
private static QueryInfo RewriteOrderByQueryInfo(QueryInfo queryInfo, GlobalFullTextSearchStatistics statistics, int componentCount)
{
Debug.Assert(queryInfo.HasOrderBy, "The component query should have an order by");
Debug.Assert(queryInfo.HasNonStreamingOrderBy, "The component query is a non streaming order by");

List<string> rewrittenOrderByExpressions = new List<string>(queryInfo.OrderByExpressions.Count);
foreach (string orderByExpression in queryInfo.OrderByExpressions)
{
string rewrittenOrderByExpression = FormatComponentQueryText(orderByExpression, statistics);
string rewrittenOrderByExpression = FormatComponentQueryTextWorkaround(orderByExpression, statistics, componentCount);
rewrittenOrderByExpressions.Add(rewrittenOrderByExpression);
}

string rewrittenQuery = FormatComponentQueryText(queryInfo.RewrittenQuery, statistics);
string rewrittenQuery = FormatComponentQueryTextWorkaround(queryInfo.RewrittenQuery, statistics, componentCount);

QueryInfo result = new QueryInfo()
{
Expand Down Expand Up @@ -648,6 +648,8 @@ private static QueryInfo RewriteOrderByQueryInfo(QueryInfo queryInfo, GlobalFull
return result;
}

// This method is unused currently, but we will switch back to using this
// once the gateway has been redeployed with the fix for placeholder indexes
private static string FormatComponentQueryText(string format, GlobalFullTextSearchStatistics statistics)
{
string query = format.Replace(Placeholders.TotalDocumentCount, statistics.DocumentCount.ToString());
Expand All @@ -665,6 +667,33 @@ private static string FormatComponentQueryText(string format, GlobalFullTextSear
return query;
}

private static string FormatComponentQueryTextWorkaround(string format, GlobalFullTextSearchStatistics statistics, int componentCount)
{
string query = format.Replace(Placeholders.TotalDocumentCount, statistics.DocumentCount.ToString());

int statisticsIndex = 0;
for (int componentIndex = 0; componentIndex < componentCount; ++componentIndex)
{
string totalWordCountPlaceholder = string.Format(Placeholders.FormattableTotalWordCount, componentIndex);
string hitCountsArrayPlaceholder = string.Format(Placeholders.FormattableHitCountsArray, componentIndex);

if (query.IndexOf(totalWordCountPlaceholder) == -1)
{
continue;
neildsh marked this conversation as resolved.
Show resolved Hide resolved
}

FullTextStatistics fullTextStatistics = statistics.FullTextStatistics[statisticsIndex];
query = query.Replace(totalWordCountPlaceholder, fullTextStatistics.TotalWordCount.ToString());

string hitCountsArray = string.Format("[{0}]", string.Join(",", fullTextStatistics.HitCounts.ToArray()));
query = query.Replace(hitCountsArrayPlaceholder, hitCountsArray);

++statisticsIndex;
}

return query;
}

private static async ValueTask<TryCatch<(GlobalFullTextSearchStatistics, QueryPage)>> GatherStatisticsAsync(
IQueryPipelineStage source,
ITrace trace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ ORDER BY RANK RRF(FullTextScore(c.title, ['John']), FullTextScore(c.text, ['Unit
FROM c
ORDER BY RANK RRF(FullTextScore(c.title, ['John']), FullTextScore(c.text, ['United States']), VectorDistance(c.vector, {SampleVector}))",
new List<List<int>>{new List<int>{ 21, 75, 37, 24, 26, 35, 49, 87, 55, 9 } }),
MakeSanityTest($@"
SELECT TOP 10 c.index AS Index, c.title AS Title, c.text AS Text
FROM c
ORDER BY RANK RRF(VectorDistance(c.vector, {SampleVector}), FullTextScore(c.title, ['John']), FullTextScore(c.text, ['United States']))",
new List<List<int>>{new List<int>{ 21, 75, 37, 24, 26, 35, 49, 87, 55, 9 } }),
MakeSanityTest($@"
SELECT TOP 10 c.index AS Index, c.title AS Title, c.text AS Text
FROM c
ORDER BY RANK RRF(VectorDistance(c.vector, {SampleVector}), FullTextScore(c.title, ['John']), VectorDistance(c.image, {SampleVector}), VectorDistance(c.backup_image, {SampleVector}), FullTextScore(c.text, ['United States']))",
new List<List<int>>{new List<int>{ 21, 75, 37, 24, 26, 35, 49, 87, 55, 9 } }),
};

foreach (SanityTestCase testCase in testCases)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,14 @@ public async Task HybridSearchTests()
skip: 7,
take: 10,
pageSize: 1),
MakeHybridSearchTest(
leafPageCount: 0,
backendPageSize: 10,
requiresGlobalStatistics: true,
skip: 0,
take: 10,
pageSize: 10,
returnEmptyGlobalStatistics: true),
};

foreach (HybridSearchTest testCase in testCases)
Expand Down Expand Up @@ -347,7 +355,8 @@ private static async Task RunHybridSearchTest(HybridSearchTest testCase)
PartitionedFeedMode.NonStreamingReversed,
componentCount: 2,
leafPageCount: testCase.LeafPageCount,
backendPageSize: testCase.BackendPageSize);
backendPageSize: testCase.BackendPageSize,
returnEmptyGlobalStatistics: testCase.ReturnEmptyGlobalStatistics);

(IReadOnlyList<CosmosElement> results, double requestCharge) = await CreateAndRunHybridSearchQueryPipelineStage(
documentContainer: nonStreamingDocumentContainer,
Expand Down Expand Up @@ -618,9 +627,16 @@ public TestCase(
}
}

private static HybridSearchTest MakeHybridSearchTest(int leafPageCount, int backendPageSize, bool requiresGlobalStatistics, int? skip, int? take, int pageSize)
private static HybridSearchTest MakeHybridSearchTest(
int leafPageCount,
int backendPageSize,
bool requiresGlobalStatistics,
int? skip,
int? take,
int pageSize,
bool returnEmptyGlobalStatistics = false)
{
return new HybridSearchTest(leafPageCount, backendPageSize, requiresGlobalStatistics, skip, take, pageSize);
return new HybridSearchTest(leafPageCount, backendPageSize, requiresGlobalStatistics, skip, take, pageSize, returnEmptyGlobalStatistics);
}

private class HybridSearchTest
Expand All @@ -637,14 +653,24 @@ private class HybridSearchTest

public int PageSize { get; }

public HybridSearchTest(int leafPageCount, int backendPageSize, bool requiresGlobalStatistics, int? skip, int? take, int pageSize)
public bool ReturnEmptyGlobalStatistics { get; }

public HybridSearchTest(
int leafPageCount,
int backendPageSize,
bool requiresGlobalStatistics,
int? skip,
int? take,
int pageSize,
bool returnEmptyGlobalStatistics)
{
this.LeafPageCount = leafPageCount;
this.BackendPageSize = backendPageSize;
this.RequiresGlobalStatistics = requiresGlobalStatistics;
this.Skip = skip;
this.Take = take;
this.PageSize = pageSize;
this.ReturnEmptyGlobalStatistics = returnEmptyGlobalStatistics;
}
}

Expand Down Expand Up @@ -1014,6 +1040,8 @@ private class MockDocumentContainer : IDocumentContainer

private readonly double totalRequestCharge;

private readonly bool returnEmptyGlobalStatistics;

private int statisticsQueryCount;

private int queryCount;
Expand Down Expand Up @@ -1041,7 +1069,8 @@ public static MockDocumentContainer Create(
PartitionedFeedMode feedMode,
int componentCount,
int leafPageCount,
int backendPageSize)
int backendPageSize,
bool returnEmptyGlobalStatistics)
{
IReadOnlyList<IReadOnlyDictionary<FeedRange, IReadOnlyList<IReadOnlyList<CosmosElement>>>> pages = CreateHybridSearchPartitionedFeed(
componentCount,
Expand All @@ -1055,7 +1084,8 @@ public static MockDocumentContainer Create(
streaming: !feedMode.HasFlag(PartitionedFeedMode.NonStreaming),
componentSelector: GetOrderByScoreKind,
isGlobalStatisticsQuery: IsGlobalStatisticsQuery,
totalRequestCharge: 0);
totalRequestCharge: 0,
returnEmptyGlobalStatistics: returnEmptyGlobalStatistics);
}

public static MockDocumentContainer Create(IReadOnlyList<FeedRangeEpk> feedRanges, PartitionedFeedMode feedMode, DocumentCreationMode documentCreationMode)
Expand All @@ -1073,21 +1103,24 @@ public static MockDocumentContainer Create(IReadOnlyList<FeedRangeEpk> feedRange
streaming: !feedMode.HasFlag(PartitionedFeedMode.NonStreaming),
componentSelector: _ => 0,
isGlobalStatisticsQuery: _ => false,
totalRequestCharge);
totalRequestCharge,
returnEmptyGlobalStatistics: false);
}

private MockDocumentContainer(
IReadOnlyList<IReadOnlyDictionary<FeedRange, IReadOnlyList<IReadOnlyList<CosmosElement>>>> pages,
bool streaming,
Func<SqlQuerySpec, int> componentSelector,
Func<SqlQuerySpec, bool> isGlobalStatisticsQuery,
double totalRequestCharge)
double totalRequestCharge,
bool returnEmptyGlobalStatistics)
{
this.pages = pages ?? throw new ArgumentNullException(nameof(pages));
this.streaming = streaming;
this.componentSelector = componentSelector;
this.isGlobalStatisticsQuery = isGlobalStatisticsQuery;
this.totalRequestCharge = totalRequestCharge;
this.returnEmptyGlobalStatistics = returnEmptyGlobalStatistics;
}

public Task<ChangeFeedPage> ChangeFeedAsync(FeedRangeState<ChangeFeedState> feedRangeState, ChangeFeedExecutionOptions changeFeedPaginationOptions, ITrace trace, CancellationToken cancellationToken)
Expand Down Expand Up @@ -1155,7 +1188,7 @@ public Task<TryCatch<QueryPage>> MonadicQueryAsync(SqlQuerySpec sqlQuerySpec, Fe
if (this.isGlobalStatisticsQuery(sqlQuerySpec))
{
QueryPage globalStatisticsPage = new QueryPage(
documents: new List<CosmosElement> { CreateHybridSearchGlobalStatistics() },
documents: new List<CosmosElement> { this.returnEmptyGlobalStatistics ? CreateEmptyHybridSearchGlobalStatistics() : CreateHybridSearchGlobalStatistics() },
requestCharge: GlobalStatisticsQueryCharge,
activityId: ActivityId,
cosmosQueryExecutionInfo: null,
Expand All @@ -1172,7 +1205,7 @@ public Task<TryCatch<QueryPage>> MonadicQueryAsync(SqlQuerySpec sqlQuerySpec, Fe
int componentIndex = this.componentSelector(sqlQuerySpec);
IReadOnlyList<IReadOnlyList<CosmosElement>> feedRangePages = this.pages[componentIndex][feedRangeState.FeedRange];
int index = feedRangeState.State == null ? 0 : int.Parse(((CosmosString)feedRangeState.State.Value).Value);
IReadOnlyList<CosmosElement> documents = feedRangePages[index];
IReadOnlyList<CosmosElement> documents = index < feedRangePages.Count ? feedRangePages[index] : Enumerable.Empty<CosmosElement>().ToList();

QueryState state = index < feedRangePages.Count - 1 ? new QueryState(CosmosString.Create((index + 1).ToString())) : null;
QueryPage queryPage = new QueryPage(
Expand Down Expand Up @@ -1379,6 +1412,38 @@ enum DocumentCreationMode
MultiItemSwapped = MultiItem | Swapped,
}

private static CosmosElement CreateEmptyHybridSearchGlobalStatistics()
{
List<CosmosElement> statistics = new List<CosmosElement>
{
CosmosObject.Create(new Dictionary<string, CosmosElement>
{
[TotalWordCount] = CosmosNumber64.Create(0),
[HitCounts] = CosmosArray.Create(new List<CosmosElement>
{
CosmosNumber64.Create(0),
CosmosNumber64.Create(0),
}),
}),
CosmosObject.Create(new Dictionary<string, CosmosElement>
{
[TotalWordCount] = CosmosNumber64.Create(0),
[HitCounts] = CosmosArray.Create(new List<CosmosElement>
{
CosmosNumber64.Create(0),
}),
}),
};

CosmosObject globalStatistics = CosmosObject.Create(new Dictionary<string, CosmosElement>
{
[DocumentCountPropertyName] = CosmosNumber64.Create(0),
[FullTextStatistics] = CosmosArray.Create(statistics),
});

return globalStatistics;
}

private static CosmosElement CreateHybridSearchGlobalStatistics()
{
List<CosmosElement> statistics = new List<CosmosElement>
Expand Down
Loading