apache · mjosephidou · Jan 4, 2018 · Aug 6, 2019 · Aug 12, 2019 · Aug 12, 2019
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
@@ -132,17 +132,28 @@ public Collection<SearchGroup<T>> getTopGroups(int groupOffset) throws IOExcepti
     final Collection<SearchGroup<T>> result = new ArrayList<>();
     int upto = 0;
     final int sortFieldCount = comparators.length;
+    assert sortFieldCount > 0; // this must always be true because fields Sort must contain at least a field
     for(CollectedSearchGroup<T> group : orderedGroups) {
       if (upto++ < groupOffset) {
         continue;
       }
       // System.out.println("  group=" + (group.groupValue == null ? "null" : group.groupValue.toString()));
       SearchGroup<T> searchGroup = new SearchGroup<>();
       searchGroup.groupValue = group.groupValue;
+      // We pass this around so that we can get the corresponding solr id when serializing the search group to send to the federator
+      searchGroup.topDocLuceneId = group.topDoc;
       searchGroup.sortValues = new Object[sortFieldCount];
       for(int sortFieldIDX=0;sortFieldIDX<sortFieldCount;sortFieldIDX++) {
         searchGroup.sortValues[sortFieldIDX] = comparators[sortFieldIDX].value(group.comparatorSlot);
       }
+      searchGroup.topDocScore = Float.NaN;
+      // if there is the score comparator we want to return the score
+      for (FieldComparator comparator: comparators){
+        if (comparator instanceof FieldComparator.RelevanceComparator){
+          searchGroup.topDocScore = (Float)comparator.value(group.comparatorSlot);
+        }
+      }
+
       result.add(searchGroup);
     }
     //System.out.println("  return " + result.size() + " groups");

diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java
@@ -48,9 +48,25 @@ public class SearchGroup<T> {
    * been passed to {@link FirstPassGroupingCollector#getTopGroups} */
   public Object[] sortValues;
 
+  /** The top doc of this group: we track the Lucene id,
+   * the Solr id and the score of the document */
+  public Object topDocSolrId;
+  public float topDocScore;
+
+  /** The topDocLuceneId will be null at the federator level because it is unique only at the shard level.
+   * It is used by the shard to get the corresponding solr id when serializing the search group to send to the federator
+   */
+  public int topDocLuceneId;
+
   @Override
   public String toString() {
-    return("SearchGroup(groupValue=" + groupValue + " sortValues=" + Arrays.toString(sortValues) + ")");
+    return "SearchGroup{" +
+        "groupValue=" + groupValue +
+        ", sortValues=" + Arrays.toString(sortValues) +
+        ", topDocSolrId=" + topDocSolrId +
+        ", topDocScore=" + topDocScore +
+        ", topDocLuceneId=" + topDocLuceneId +
+        '}';
   }
 
   @Override
@@ -113,6 +129,11 @@ private static class MergedGroup<T> {
     public boolean processed;
     public boolean inQueue;
 
+    /** The top doc of this group:
+     * the Solr id and the score of the document */
+    public float topDocScore;
+    public Object topDocSolrId;
+
     public MergedGroup(T groupValue) {
       this.groupValue = groupValue;
     }
@@ -225,6 +246,8 @@ private void updateNextGroup(int topN, ShardIter<T> shard) {
           // Start a new group:
           //System.out.println("      new");
           mergedGroup = new MergedGroup<>(group.groupValue);
+          mergedGroup.topDocSolrId = group.topDocSolrId;
+          mergedGroup.topDocScore = group.topDocScore;
           mergedGroup.minShardIndex = shard.shardIndex;
           assert group.sortValues != null;
           mergedGroup.topValues = group.sortValues;
@@ -262,6 +285,8 @@ private void updateNextGroup(int topN, ShardIter<T> shard) {
             if (mergedGroup.inQueue) {
               queue.remove(mergedGroup);
             }
+            mergedGroup.topDocScore = group.topDocScore;
+            mergedGroup.topDocSolrId = group.topDocSolrId;
             mergedGroup.topValues = group.sortValues;
             mergedGroup.minShardIndex = shard.shardIndex;
             queue.add(mergedGroup);
@@ -308,6 +333,8 @@ public Collection<SearchGroup<T>> merge(List<Collection<SearchGroup<T>>> shards,
           final SearchGroup<T> newGroup = new SearchGroup<>();
           newGroup.groupValue = group.groupValue;
           newGroup.sortValues = group.topValues;
+          newGroup.topDocSolrId = group.topDocSolrId;
+          newGroup.topDocScore = group.topDocScore;
           newTopGroups.add(newGroup);
           if (newTopGroups.size() == topN) {
             break;

diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
@@ -135,12 +135,12 @@ public static <T> TopGroups<T> merge(TopGroups<T>[] shardGroups, Sort groupSort,
     } else {
       shardTopDocs = new TopFieldDocs[shardGroups.length];
     }
-    float totalMaxScore = Float.MIN_VALUE;
+    float totalMaxScore = Float.NaN;
 
     for(int groupIDX=0;groupIDX<numGroups;groupIDX++) {
       final T groupValue = shardGroups[0].groups[groupIDX].groupValue;
       //System.out.println("  merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
-      float maxScore = Float.MIN_VALUE;
+      float maxScore = Float.NaN;
       int totalHits = 0;
       double scoreSum = 0.0;
       for(int shardIDX=0;shardIDX<shardGroups.length;shardIDX++) {
@@ -173,8 +173,12 @@ public static <T> TopGroups<T> merge(TopGroups<T>[] shardGroups, Sort groupSort,
         for (int i = 0; i < shardTopDocs[shardIDX].scoreDocs.length; i++) {
           shardTopDocs[shardIDX].scoreDocs[i].shardIndex = shardIDX;
         }
-
-        maxScore = Math.max(maxScore, shardGroupDocs.maxScore);
+        if (Float.isNaN(maxScore)) {
+          maxScore = shardGroupDocs.maxScore;
+        }
+        else if (! Float.isNaN(shardGroupDocs.maxScore)) {
+          maxScore = Math.max(maxScore, shardGroupDocs.maxScore);
+        }
         assert shardGroupDocs.totalHits.relation == Relation.EQUAL_TO;
         totalHits += shardGroupDocs.totalHits.value;
         scoreSum += shardGroupDocs.score;

diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -99,9 +99,11 @@
 import org.apache.solr.search.grouping.distributed.requestfactory.StoredFieldsShardRequestFactory;
 import org.apache.solr.search.grouping.distributed.requestfactory.TopGroupsShardRequestFactory;
 import org.apache.solr.search.grouping.distributed.responseprocessor.SearchGroupShardResponseProcessor;
+import org.apache.solr.search.grouping.distributed.responseprocessor.SkipSecondStepSearchGroupShardResponseProcessor;
 import org.apache.solr.search.grouping.distributed.responseprocessor.StoredFieldsShardResponseProcessor;
 import org.apache.solr.search.grouping.distributed.responseprocessor.TopGroupsShardResponseProcessor;
 import org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer;
+import org.apache.solr.search.grouping.distributed.shardresultserializer.SkipSecondStepSearchResultResultTransformer;
 import org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer;
 import org.apache.solr.search.grouping.endresulttransformer.EndResultTransformer;
 import org.apache.solr.search.grouping.endresulttransformer.GroupedEndResultTransformer;
@@ -305,17 +307,18 @@ protected void prepareGrouping(ResponseBuilder rb) throws IOException {
     groupingSpec.setNeedScore((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0);
     groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
 
-    // when group.format=grouped then, validate group.offset
-    // for group.main=true and group.format=simple, start value is used instead of group.offset
-    // and start is already validate above for negative values
-    if (!(groupingSpec.isMain() || groupingSpec.getResponseFormat() == Grouping.Format.simple) &&
-        groupingSpec.getWithinGroupSortSpec().getOffset() < 0) {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'group.offset' parameter cannot be negative");
+    if (params.getBool(GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND, GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND_DEFAULT)) {
+      // skip second step is enabled
+      groupingSpec.setSkipSecondGroupingStep(true);
+      // check if reranking is enabled
+      if (rb.getRankQuery() != null) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+            GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND+" does not support reranking parameter "+CommonParams.RQ);
+      }
     }
+    groupingSpec.validate();
   }
 
-
-
   /**
    * Actually run the query
    */
@@ -547,7 +550,9 @@ protected int groupedDistributedProcess(ResponseBuilder rb) {
     } else if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) {
       nextStage = ResponseBuilder.STAGE_EXECUTE_QUERY;
     } else if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) {
-      shardRequestFactory = new TopGroupsShardRequestFactory();
+      if (!rb.getGroupingSpec().isSkipSecondGroupingStep()) {
+        shardRequestFactory = new TopGroupsShardRequestFactory();
+      }
       nextStage = ResponseBuilder.STAGE_GET_FIELDS;
     } else if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) {
       nextStage = ResponseBuilder.STAGE_GET_FIELDS;
@@ -593,10 +598,18 @@ public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
     }
   }
 
+  protected SearchGroupShardResponseProcessor newSearchGroupShardResponseProcessor(ResponseBuilder rb) {
+    if (rb.getGroupingSpec().isSkipSecondGroupingStep()) {
+      return new SkipSecondStepSearchGroupShardResponseProcessor();
+    } else {
+      return new SearchGroupShardResponseProcessor();
+    }
+  }
+
   protected void handleGroupedResponses(ResponseBuilder rb, ShardRequest sreq) {
     ShardResponseProcessor responseProcessor = null;
     if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_GROUPS) != 0) {
-      responseProcessor = new SearchGroupShardResponseProcessor();
+      responseProcessor = newSearchGroupShardResponseProcessor(rb);
     } else if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
       responseProcessor = new TopGroupsShardResponseProcessor();
     } else if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
@@ -1286,6 +1299,14 @@ private boolean doProcessSearchByIds(ResponseBuilder rb) throws IOException {
     return true;
   }
 
+  protected SearchGroupsResultTransformer newSearchGroupsResultTransformer(ResponseBuilder rb, SolrIndexSearcher searcher) {
+    if (rb.getGroupingSpec().isSkipSecondGroupingStep()) {
+      return new SkipSecondStepSearchResultResultTransformer(searcher);
+    } else {
+      return new SearchGroupsResultTransformer(searcher);
+    }
+  }
+
   private void doProcessGroupedDistributedSearchFirstPhase(ResponseBuilder rb, QueryCommand cmd, QueryResult result) throws IOException {
 
     GroupingSpecification groupingSpec = rb.getGroupingSpec();
@@ -1315,7 +1336,7 @@ private void doProcessGroupedDistributedSearchFirstPhase(ResponseBuilder rb, Que
 
     CommandHandler commandHandler = topsGroupsActionBuilder.build();
     commandHandler.execute();
-    SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(searcher);
+    SearchGroupsResultTransformer serializer = newSearchGroupsResultTransformer(rb, searcher);
 
     rsp.add("firstPhase", commandHandler.processResult(result, serializer));
     rsp.add("totalHitCount", commandHandler.getTotalHitCount());

diff --git a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
@@ -16,6 +16,11 @@
  */
 package org.apache.solr.search.grouping;
 
+import java.util.Arrays;
+import java.util.Collections;
+import org.apache.lucene.search.SortField;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.GroupParams;
 import org.apache.solr.search.Grouping;
 import org.apache.solr.search.SortSpec;
 
@@ -36,6 +41,73 @@ public class GroupingSpecification {
   private Grouping.Format responseFormat;
   private boolean needScore;
   private boolean truncateGroups;
+  /* This is an optimization to skip the second grouping step when groupLimit is 1. The second
+  * grouping step retrieves the top K documents for each group. This is not necessary when only one
+  * document per group is required because in the first step every shard sends back the group score given
+  * by its top document.
+  */
+  private boolean skipSecondGroupingStep;
+
+  /**
+   * Validates the current GropingSpecification.
+   * It will throw a SolrException the grouping specification is not valid, otherwise
+   * it will return without side effects.
+   */
+  public void validate() throws SolrException {
+    if (skipSecondGroupingStep) {
+      validateSkipSecondGroupingStep();
+    }
+
+    // when group.format=grouped then, validate group.offset
+    // for group.main=true and group.format=simple, start value is used instead of group.offset
+    if (!(main || responseFormat == Grouping.Format.simple) &&
+       withinGroupSortSpec.getOffset() < 0) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'group.offset' parameter cannot be negative");
+    }
+  }
+
+  private void validateSkipSecondGroupingStep() {
+    // Only possible if we only want one doc per group
+    final int limit =  withinGroupSortSpec.getCount();
+    final int offset = withinGroupSortSpec.getOffset();
+    if (limit != 1) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+          GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support " +
+          GroupParams.GROUP_LIMIT + " != 1 ("+GroupParams.GROUP_LIMIT+" is "+limit+")");
+    }
+
+    // group.func not supported
+    if (functions.length > 0){
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+              GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support "+ GroupParams.GROUP_FUNC);
+    }
+    // group.query not supported
+    if (queries.length > 0){
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+              GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support "+ GroupParams.GROUP_QUERY);
+    }
+
+    if (offset != 0) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+          GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support " + GroupParams.GROUP_OFFSET + " != 0 (" +
+              GroupParams.GROUP_OFFSET + " is "+offset + ")");
+    }
+
+    if (includeGroupCount) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+          GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support " + GroupParams.GROUP_TOTAL_COUNT + " == true");
+    }
+
+    final SortField[] withinGroupSortFields = withinGroupSortSpec.getSort().getSort();
+    final SortField[] groupSortFields = groupSortSpec.getSort().getSort();
+
+    // Within group sort must be the same as group sort because if we skip second step no sorting within group will be done.
+    // This checks if withinGroupSortFields is a prefix of groupSortFields
+    if (Collections.indexOfSubList(Arrays.asList(groupSortFields), Arrays.asList(withinGroupSortFields)) != 0) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+          GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not allow the given within/global sort group configuration");
+    }
+  }
 
   public String[] getFields() {
     return fields;
@@ -129,4 +201,12 @@ public void setWithinGroupSortSpec(SortSpec withinGroupSortSpec) {
     this.withinGroupSortSpec = withinGroupSortSpec;
   }
 
+  public boolean isSkipSecondGroupingStep() {
+    return skipSecondGroupingStep;
+  }
+
+  public void setSkipSecondGroupingStep(boolean skipSecondGroupingStep) {
+    this.skipSecondGroupingStep = skipSecondGroupingStep;
+  }
+
 }
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/GroupConverter.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/GroupConverter.java
@@ -52,6 +52,8 @@ static Collection<SearchGroup<BytesRef>> fromMutable(SchemaField field, Collecti
     for (SearchGroup<MutableValue> original : values) {
       SearchGroup<BytesRef> converted = new SearchGroup<BytesRef>();
       converted.sortValues = original.sortValues;
+      converted.topDocLuceneId = original.topDocLuceneId;
+      converted.topDocScore = original.topDocScore;
       if (original.groupValue.exists) {
         BytesRefBuilder binary = new BytesRefBuilder();
         fieldType.readableToIndexed(original.groupValue.toString(), binary);