diff --git a/sensei-core/src/main/java/com/senseidb/search/node/broker/BrokerConfig.java b/sensei-core/src/main/java/com/senseidb/search/node/broker/BrokerConfig.java index 6a37edd5e..cbe73aa47 100644 --- a/sensei-core/src/main/java/com/senseidb/search/node/broker/BrokerConfig.java +++ b/sensei-core/src/main/java/com/senseidb/search/node/broker/BrokerConfig.java @@ -69,8 +69,8 @@ public BrokerConfig(Configuration senseiConf, zkurl = senseiConf.getString(SenseiConfigServletContextListener.SENSEI_CONF_ZKURL, zkurl); clusterName = senseiConf.getString(SenseiConfigServletContextListener.SENSEI_CONF_CLUSTER_NAME, clusterName); zkTimeout = senseiConf.getInt(SenseiConfigServletContextListener.SENSEI_CONF_ZKTIMEOUT, zkTimeout); - outlierMultiplier = senseiConf.getDouble(SenseiConfigServletContextListener.SENSEI_CONF_ZKTIMEOUT, 3.0); - outlierConstant = senseiConf.getDouble(SenseiConfigServletContextListener.SENSEI_CONF_ZKTIMEOUT, 150); + outlierMultiplier = senseiConf.getDouble(SenseiConfigServletContextListener.SENSEI_CONF_NC_OUTLIER_MULTIPLIER, 3.0); + outlierConstant = senseiConf.getDouble(SenseiConfigServletContextListener.SENSEI_CONF_NC_OUTLIER_CONSTANT, 150); connectTimeoutMillis = senseiConf.getInt(SenseiConfigServletContextListener.SENSEI_CONF_NC_CONN_TIMEOUT, 1000); writeTimeoutMillis = senseiConf.getInt(SenseiConfigServletContextListener.SENSEI_CONF_NC_WRITE_TIMEOUT, 150); diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/AndFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/AndFilterConstructor.java index 84cc1aef1..733f3c73f 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/AndFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/AndFilterConstructor.java @@ -52,16 +52,16 @@ public AndFilterConstructor(QueryParser qparser) } @Override - protected Filter doConstructFilter(Object obj) throws Exception + protected SenseiFilter doConstructFilter(Object obj) throws Exception { JSONArray filterArray = (JSONArray)obj; - List filters = new ArrayList(filterArray.length()); + List filters = new ArrayList(filterArray.length()); for (int i=0; i andFilters = new ArrayList(); + List andFilters = new ArrayList(); if (obj != null) { if (obj instanceof JSONArray) @@ -90,29 +90,29 @@ else if (obj instanceof JSONObject) for (int i=0; i<((JSONArray)obj).length(); ++i) { andFilters.add( - new NotFilter(FilterConstructor.constructFilter(((JSONArray)obj).getJSONObject(i), + new SenseiNotFilter(FilterConstructor.constructFilter(((JSONArray)obj).getJSONObject(i), _qparser))); } } else if (obj instanceof JSONObject) { - andFilters.add(new NotFilter(FilterConstructor.constructFilter((JSONObject)obj, _qparser))); + andFilters.add(new SenseiNotFilter(FilterConstructor.constructFilter((JSONObject)obj, _qparser))); } } JSONArray array = json.optJSONArray(SHOULD_PARAM); if (array != null) { - List orFilters = new ArrayList(array.length()); + List orFilters = new ArrayList(array.length()); for (int i=0; i 0) - andFilters.add(new OrFilter(orFilters)); + andFilters.add(new SenseiOrFilter(orFilters)); } if (andFilters.size() > 0) - return new AndFilter(andFilters); + return new SenseiAndFilter(andFilters); else return null; } diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/ConstExpFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/ConstExpFilterConstructor.java index 7a0aa5a84..658629d67 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/ConstExpFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/ConstExpFilterConstructor.java @@ -18,7 +18,10 @@ */ package com.senseidb.search.query.filters; +import com.senseidb.search.query.MatchNoneDocsQuery; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Filter; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryWrapperFilter; import org.json.JSONObject; @@ -27,17 +30,38 @@ import com.senseidb.util.JSONUtil.FastJSONArray; import com.senseidb.util.JSONUtil.FastJSONObject; +import java.io.IOException; + public class ConstExpFilterConstructor extends FilterConstructor { public static final String FILTER_TYPE = "const_exp"; @Override - protected Filter doConstructFilter(Object json) throws Exception + protected SenseiFilter doConstructFilter(Object json) throws Exception { Query q = QueryConstructor.constructQuery(new FastJSONObject().put(FILTER_TYPE, (JSONObject)json), null); if (q == null) return null; - return new QueryWrapperFilter(q); - } + + final QueryWrapperFilter filter = new QueryWrapperFilter(q); + + if(q instanceof MatchAllDocsQuery) { + return new SenseiFilter() { + @Override + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + return new SenseiDocIdSet(filter.getDocIdSet(reader), reader.maxDoc()); + } + }; + } else if(q instanceof MatchNoneDocsQuery) { + return new SenseiFilter() { + @Override + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + return new SenseiDocIdSet(filter.getDocIdSet(reader), 0); + } + }; + } else { + return SenseiFilter.buildDefault(filter); + } + } } diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/CustomFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/CustomFilterConstructor.java index f97c54d3c..150006241 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/CustomFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/CustomFilterConstructor.java @@ -18,9 +18,12 @@ */ package com.senseidb.search.query.filters; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Filter; import org.json.JSONObject; +import java.io.IOException; + public class CustomFilterConstructor extends FilterConstructor { @@ -33,15 +36,20 @@ public class CustomFilterConstructor extends FilterConstructor // } @Override - protected Filter doConstructFilter(Object json) throws Exception + protected SenseiFilter doConstructFilter(Object json) throws Exception { try { String className = ((JSONObject)json).getString(CLASS_PARAM); Class filterClass = Class.forName(className); - Filter f = (Filter)filterClass.newInstance(); - return f; + final Filter f = (Filter)filterClass.newInstance(); + + if(f instanceof SenseiFilter) { + return (SenseiFilter) f; + } else { + return SenseiFilter.buildDefault(f); + } } catch(Throwable t) { diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/FacetSelectionFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/FacetSelectionFilterConstructor.java index 8d870f345..ee7588235 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/FacetSelectionFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/FacetSelectionFilterConstructor.java @@ -19,10 +19,9 @@ package com.senseidb.search.query.filters; import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.Map; +import java.util.*; +import com.browseengine.bobo.facets.filter.RandomAccessFilter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; @@ -63,17 +62,16 @@ public static BrowseSelection buildFacetSelection(String name,JSONObject json) t @Override - protected Filter doConstructFilter(Object obj) throws Exception { + protected SenseiFilter doConstructFilter(Object obj) throws Exception { final JSONObject json = (JSONObject)obj; - return new Filter(){ + return new SenseiFilter(){ @Override - public DocIdSet getDocIdSet(IndexReader reader) - throws IOException { + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { if (reader instanceof BoboIndexReader){ BoboIndexReader boboReader = (BoboIndexReader)reader; Iterator iter = json.keys(); - ArrayList docSets = new ArrayList(); + ArrayList filters = new ArrayList(); while(iter.hasNext()){ String key = iter.next(); FacetHandler facetHandler = boboReader.getFacetHandler(key); @@ -81,7 +79,7 @@ public DocIdSet getDocIdSet(IndexReader reader) try{ JSONObject jsonObj = json.getJSONObject(key); BrowseSelection sel = buildFacetSelection(key, jsonObj); - docSets.add(facetHandler.buildFilter(sel).getDocIdSet(boboReader)); + filters.add(facetHandler.buildFilter(sel)); } catch(Exception e){ throw new IOException(e.getMessage()); @@ -91,9 +89,15 @@ public DocIdSet getDocIdSet(IndexReader reader) throw new IOException(key+" is not defined as a facet handler"); } } - if (docSets.size()==0) return null; - else if (docSets.size()==1) return docSets.get(0); - return new AndDocIdSet(docSets); + if(filters.isEmpty()) { + return null; + } else { + List senseiFilters = new ArrayList(filters.size()); + for(RandomAccessFilter raf : filters) { + senseiFilters.add(SenseiFilter.build(raf)); + } + return new SenseiAndFilter(senseiFilters).getSenseiDocIdSet(reader); + } } else{ throw new IllegalStateException("reader not instance of "+BoboIndexReader.class); diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/FilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/FilterConstructor.java index ae469e804..24b6a13aa 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/FilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/FilterConstructor.java @@ -100,7 +100,7 @@ public static Map convertParams(JSONObject obj){ return paramMap; } - public static Filter constructFilter(JSONObject json, QueryParser qparser) throws Exception + public static SenseiFilter constructFilter(JSONObject json, QueryParser qparser) throws Exception { if (json == null) return null; @@ -118,6 +118,6 @@ public static Filter constructFilter(JSONObject json, QueryParser qparser) throw return filterConstructor.doConstructFilter(json.get(type)); } - abstract protected Filter doConstructFilter(Object json/* JSONObject or JSONArray */) throws Exception; + abstract protected SenseiFilter doConstructFilter(Object json/* JSONObject or JSONArray */) throws Exception; } diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/NullFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/NullFilterConstructor.java index 3ed04e955..7d34fa60d 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/NullFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/NullFilterConstructor.java @@ -20,6 +20,8 @@ import java.io.IOException; +import com.browseengine.bobo.facets.FacetHandler; +import com.browseengine.bobo.facets.impl.MultiValueFacetHandler; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; @@ -35,28 +37,44 @@ public class NullFilterConstructor extends FilterConstructor { public static final String FILTER_TYPE = "isNull"; + @Override - protected Filter doConstructFilter(Object json) throws Exception { + protected SenseiFilter doConstructFilter(Object json) throws Exception { final String fieldName = json instanceof String ? (String) json : ((JSONObject) json).getString("field"); - return new Filter() { - + return new SenseiFilter() { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final Object data = ((BoboIndexReader)reader).getFacetData(fieldName); - if (data instanceof MultiValueFacetDataCache) { - return new DocIdSet() { + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + BoboIndexReader boboReader = (BoboIndexReader) reader; + FacetHandler facetHandler = boboReader.getFacetHandler(fieldName); + Object facetData = facetHandler.getFacetData(boboReader); + + if(facetData instanceof MultiValueFacetDataCache) + { + final MultiValueFacetDataCache facetDataCache = (MultiValueFacetDataCache) facetData; + int cardinality = facetDataCache.freqs[0]; + + DocIdSet docIdSet = new DocIdSet() { @Override - public DocIdSetIterator iterator() throws IOException { - return new MultiValueFacetDocIdSetIterator((MultiValueFacetDataCache)data, 0); + public DocIdSetIterator iterator() throws IOException { + return new MultiValueFacetDocIdSetIterator(facetDataCache, 0); } }; - } else if (data instanceof FacetDataCache) { - return new DocIdSet() { + + return new SenseiDocIdSet(docIdSet, cardinality); + } + else if (facetData instanceof FacetDataCache) + { + final FacetDataCache facetDataCache = (FacetDataCache) facetData; + int cardinality = facetDataCache.freqs[0]; + + DocIdSet docIdSet = new DocIdSet() { @Override - public DocIdSetIterator iterator() throws IOException { - return new FacetFilter.FacetDocIdSetIterator((FacetDataCache) data, 0); + public DocIdSetIterator iterator() throws IOException { + return new FacetFilter.FacetDocIdSetIterator(facetDataCache, 0); } }; + + return new SenseiDocIdSet(docIdSet, cardinality); } throw new UnsupportedOperationException("The null filter is supported only for the bobo facetHandlers that use FacetDataCache"); } diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/OrFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/OrFilterConstructor.java index b511685cb..eee720dda 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/OrFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/OrFilterConstructor.java @@ -49,15 +49,15 @@ public OrFilterConstructor(QueryParser qparser) } @Override - protected Filter doConstructFilter(Object obj) throws Exception { + protected SenseiFilter doConstructFilter(Object obj) throws Exception { JSONArray filterArray = (JSONArray)obj; - List filters = new ArrayList(filterArray.length()); + List filters = new ArrayList(filterArray.length()); for (int i=0; i> 1; + return new SenseiDocIdSet(queryWrapperFilter.getDocIdSet(reader), cardinalityEstimate); + } + }; } } diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/RangeFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/RangeFilterConstructor.java index 7814b6bb2..bef6c3d33 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/RangeFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/RangeFilterConstructor.java @@ -47,7 +47,7 @@ public class RangeFilterConstructor extends FilterConstructor public static final String FILTER_TYPE = "range"; @Override - protected Filter doConstructFilter(Object obj) throws Exception + protected SenseiFilter doConstructFilter(Object obj) throws Exception { final JSONObject json = (JSONObject)obj; @@ -105,17 +105,18 @@ else if (lte != null && lte.length() != 0) include_upper = jsonObj.optBoolean(INCLUDE_UPPER_PARAM, true); } - return new Filter() + return new SenseiFilter() { @Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException - { + public SenseiDocIdSet getSenseiDocIdSet(final IndexReader reader) throws IOException { + int defaultCardinalityEstimate = reader.maxDoc() >> 1; + String fromPadded = from, toPadded = to; if (!noOptimize) { if (reader instanceof BoboIndexReader) { - BoboIndexReader boboReader = (BoboIndexReader)reader; + final BoboIndexReader boboReader = (BoboIndexReader)reader; FacetHandler facetHandler = boboReader.getFacetHandler(field); if (facetHandler != null) { @@ -145,7 +146,9 @@ public DocIdSet getDocIdSet(final IndexReader reader) throws IOException } else { filter = new FacetRangeFilter(facetHandler, sb.toString()); } - return filter.getDocIdSet(reader); + int cardinality = (int)(reader.maxDoc() * filter.getFacetSelectivity(boboReader)); + DocIdSet docIdSet = filter.getDocIdSet(reader); + return new SenseiDocIdSet(docIdSet, cardinality); } } } @@ -200,7 +203,7 @@ else if ("double".equals(type)) { if (fromPadded == null || fromPadded.length() == 0) if (toPadded == null || toPadded.length() == 0) - return new DocIdSet() + return new SenseiDocIdSet(new DocIdSet() { @Override public boolean isCacheable() @@ -213,13 +216,16 @@ public DocIdSetIterator iterator() throws IOException { return new MatchAllDocIdSetIterator(reader); } - }; + }, reader.maxDoc()); else - return new TermRangeFilter(field, fromPadded, toPadded, false, include_upper).getDocIdSet(reader); + return new SenseiDocIdSet(new TermRangeFilter(field, fromPadded, toPadded, false, + include_upper).getDocIdSet(reader), defaultCardinalityEstimate); else if (toPadded == null|| toPadded.length() == 0) - return new TermRangeFilter(field, fromPadded, toPadded, include_lower, false).getDocIdSet(reader); + return new SenseiDocIdSet(new TermRangeFilter(field, fromPadded, toPadded, include_lower, + false).getDocIdSet(reader), defaultCardinalityEstimate); - return new TermRangeFilter(field, fromPadded, toPadded, include_lower, include_upper).getDocIdSet(reader); + return new SenseiDocIdSet(new TermRangeFilter(field, fromPadded, toPadded, include_lower, + include_upper).getDocIdSet(reader), defaultCardinalityEstimate); } }; } diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiAndFilter.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiAndFilter.java new file mode 100644 index 000000000..4e5ce90dc --- /dev/null +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiAndFilter.java @@ -0,0 +1,79 @@ +package com.senseidb.search.query.filters; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.DocIdSet; + +import com.kamikaze.docidset.impl.AndDocIdSet; + +/** + * An AND filter. + * Currently uses an upper bound of the cardinality estimate by taking the maximum possible number of + * documents returned by a sub-filter + */ +public class SenseiAndFilter extends SenseiFilter +{ + + private static final long serialVersionUID = 1L; + + private final List _filters; + + public SenseiAndFilter(List filters) + { + _filters = filters; + } + + /** + * Returns lowest cardinality sets first. + */ + private static class SenseiDocIdSetComparator implements Comparator { + @Override + public int compare(SenseiDocIdSet a, SenseiDocIdSet b) { + if(a.getCardinalityEstimate() < b.getCardinalityEstimate()) { + return -1; + } else if(a.getCardinalityEstimate() == b.getCardinalityEstimate()) { + return 0; + } else { + return 1; + } + } + } + + public static final Comparator SENSEI_DOC_ID_SET_COMPARATOR = new SenseiDocIdSetComparator(); + + @Override + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + if (_filters.size() == 1) + { + return _filters.get(0).getSenseiDocIdSet(reader); + } + else + { + List senseiDocIdSets = new ArrayList(_filters.size()); + int cardinalityEstimate = reader.maxDoc(); + + for (SenseiFilter f : _filters) + { + SenseiDocIdSet senseiDocIdSet = f.getSenseiDocIdSet(reader); + senseiDocIdSets.add(senseiDocIdSet); + cardinalityEstimate = Math.min(cardinalityEstimate, senseiDocIdSet.getCardinalityEstimate()); + } + + // Lowest cardinality filters should come first in the AND + Collections.sort(senseiDocIdSets, SENSEI_DOC_ID_SET_COMPARATOR); + + List docIdSets = new ArrayList(senseiDocIdSets.size()); + for(SenseiDocIdSet senseiDocIdSet : senseiDocIdSets) + { + docIdSets.add(senseiDocIdSet.getDocIdSet()); + } + + return new SenseiDocIdSet(new AndDocIdSet(docIdSets), cardinalityEstimate); + } + } +} diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiFilter.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiFilter.java new file mode 100644 index 000000000..7636f2b8e --- /dev/null +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiFilter.java @@ -0,0 +1,55 @@ +package com.senseidb.search.query.filters; + +import com.browseengine.bobo.api.BoboIndexReader; +import com.browseengine.bobo.facets.filter.RandomAccessFilter; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; + +import java.io.IOException; + +/** + * A filter implementation that provides an expected cardinality of the associated filter. + * The cardinality is intended to be used to optimize filter execution order at runtime. + * For instance, an AND filter should always begin the AND using a filter of the LOWEST cardinality to + * reduce the number of documents considered in the result set + */ +public abstract class SenseiFilter extends Filter { + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + SenseiDocIdSet docIdSet = getSenseiDocIdSet(reader); + return docIdSet.getDocIdSet(); + } + + public abstract SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException; + + public static SenseiFilter buildDefault(final Filter filter) { + return buildDefault(filter, -1); + } + + public static SenseiFilter build(final RandomAccessFilter randomAccessFilter) throws IOException { + return new SenseiFilter() { + @Override + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + double facetSelectivity = randomAccessFilter.getFacetSelectivity((BoboIndexReader) reader); + int maxDoc = reader.maxDoc(); + int cardinality = (int) (facetSelectivity * maxDoc); + return new SenseiDocIdSet(randomAccessFilter.getDocIdSet(reader), cardinality); + } + }; + } + + public static SenseiFilter buildDefault(final Filter filter, final int suppliedCardinality) { + return new SenseiFilter() { + @Override + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + // TODO: There needs to be a way to estimate cardinality of a column in general. + // Either we can maintain a running estimate of the hit rate of a column + // or allow a client to preload an expected estimate + int cardinality = suppliedCardinality < 0 ? reader.maxDoc() >> 1 : suppliedCardinality; + return new SenseiDocIdSet(filter.getDocIdSet(reader), cardinality); + } + }; + } + +} diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiNotFilter.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiNotFilter.java new file mode 100644 index 000000000..bfdeb4cfc --- /dev/null +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiNotFilter.java @@ -0,0 +1,34 @@ +package com.senseidb.search.query.filters; + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; + +import com.kamikaze.docidset.impl.NotDocIdSet; + +/** + * A NOT filter implementation. + * + * Since the sensei filters return upper bounds on cardinality, there is no way to estimate the cardinality of + * a NOT in general. We would need a lower bound on cardinality to do that. Hence we go with maxDoc + */ +public class SenseiNotFilter extends SenseiFilter { + private static final long serialVersionUID = 1L; + + private final SenseiFilter _innerFilter; + + public SenseiNotFilter(SenseiFilter innerFilter) + { + _innerFilter = innerFilter; + } + + @Override + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + SenseiDocIdSet senseiDocIdSet = _innerFilter.getSenseiDocIdSet(reader); + int maxDoc = reader.maxDoc(); +// int cardinality = maxDoc - senseiDocIdSet.getCardinalityEstimate(); + return new SenseiDocIdSet(new NotDocIdSet(senseiDocIdSet.getDocIdSet(), maxDoc), maxDoc); + } +} diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiOrFilter.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiOrFilter.java new file mode 100644 index 000000000..283225bb9 --- /dev/null +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiOrFilter.java @@ -0,0 +1,47 @@ +package com.senseidb.search.query.filters; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; + +import com.kamikaze.docidset.impl.OrDocIdSet; + +public class SenseiOrFilter extends SenseiFilter { + /** + * + */ + private static final long serialVersionUID = 1L; + + private final List _filters; + + public SenseiOrFilter(List filters) + { + _filters = filters; + } + + @Override + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + if(_filters.size() == 1) + { + return _filters.get(0).getSenseiDocIdSet(reader); + } + else + { + List list = new ArrayList(_filters.size()); + int cardinalityEstimate = 0; + for (SenseiFilter f : _filters) + { + SenseiDocIdSet senseiDocIdSet = f.getSenseiDocIdSet(reader); + list.add(senseiDocIdSet.getDocIdSet()); + cardinalityEstimate += senseiDocIdSet.getCardinalityEstimate(); + } + cardinalityEstimate = Math.min(cardinalityEstimate, reader.maxDoc()); + return new SenseiDocIdSet(new OrDocIdSet(list), cardinalityEstimate); + } + } +} + diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiTermFilter.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiTermFilter.java index 231ac66a0..7cda8ecba 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiTermFilter.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/SenseiTermFilter.java @@ -18,9 +18,15 @@ */ package com.senseidb.search.query.filters; +import com.browseengine.bobo.facets.filter.RandomAccessFilter; +import com.browseengine.bobo.util.BigSegmentedArray; +import com.senseidb.util.Pair; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; import java.util.List; import org.apache.log4j.Logger; @@ -28,7 +34,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Filter; import com.browseengine.bobo.api.BoboIndexReader; import com.browseengine.bobo.api.BrowseSelection; @@ -42,7 +47,7 @@ import com.kamikaze.docidset.impl.NotDocIdSet; import com.kamikaze.docidset.impl.OrDocIdSet; -public class SenseiTermFilter extends Filter { +public class SenseiTermFilter extends SenseiFilter { /** * @@ -67,9 +72,13 @@ public SenseiTermFilter(String name,String vals[],String[] not,boolean isAnd,boo _noAutoOptimize = noAutoOptimize; } - private static DocIdSet buildDefaultDocIdSets(final BoboIndexReader reader,final String name,final String[] vals,boolean isAnd){ + static DocIdSet buildDefaultDocIdSets(final BoboIndexReader reader, + final String name, + final String[] vals, + boolean isAnd){ if (vals==null) return null; ArrayList docSetList = new ArrayList(vals.length); + for (final String val : vals){ docSetList.add(new DocIdSet() { @@ -85,7 +94,7 @@ public DocIdSetIterator iterator() throws IOException { } else if (docSetList.size()==0) return null; else{ - if (isAnd){ + if (isAnd) { return new AndDocIdSet(docSetList); } else{ @@ -93,8 +102,43 @@ public DocIdSetIterator iterator() throws IOException { } } } - - + + private static int estimateCardinality(int positiveEstimate, int negativeEstimate) { + if(positiveEstimate > 0) { + if(negativeEstimate == 0) { + return positiveEstimate; + } else { + // Both positive and negative. We don't know what the cardinality will be before executing the not + return positiveEstimate; + } + } else { + // Negative only - return the negative estimate + return negativeEstimate; + } + } + + private static class CardinalityComparator implements Comparator> { + @Override + public int compare(Pair termA, Pair termB) + { + int comparison = termA.getSecond() - termB.getSecond(); + if(comparison == 0) + { + if(termA.getFirst() == null) { + return termB == null ? 0 : -1; + } + return termA.getFirst().compareTo(termB.getFirst()); + } + else + { + return comparison; + } + } + } + + private static CardinalityComparator cardinalityComparator = new CardinalityComparator(); + + private static DocIdSet buildLuceneDefaultDocIdSet(final BoboIndexReader reader, final String name, final String[] vals, @@ -121,8 +165,8 @@ public DocIdSetIterator iterator() throws IOException DocIdSet positiveSet = null; DocIdSet negativeSet = null; - if (vals!=null && vals.length>0) - positiveSet = buildDefaultDocIdSets(reader,name,vals,isAnd); + if (vals!=null && vals.length > 0) + positiveSet = buildDefaultDocIdSets(reader, name, vals, isAnd); if (nots!=null && nots.length>0) negativeSet = buildDefaultDocIdSets(reader, name, nots, false); @@ -131,7 +175,7 @@ public DocIdSetIterator iterator() throws IOException if (negativeSet==null){ return positiveSet; } - else{ + else { DocIdSet[] sets = new DocIdSet[]{positiveSet,new NotDocIdSet(negativeSet, reader.maxDoc())}; return new AndDocIdSet(Arrays.asList(sets)); } @@ -147,56 +191,105 @@ public DocIdSetIterator iterator() throws IOException } } + static int estimateCardinality(List> valsAndFreqs, int maxDoc, boolean isAnd) { + if(valsAndFreqs == null || valsAndFreqs.isEmpty()) + return 0; + + int cardinality = isAnd ? maxDoc : 0; + for(Pair valAndFreq : valsAndFreqs) { + if(isAnd) { + cardinality = Math.min(cardinality, valAndFreq.getSecond()); + } else { + cardinality += valAndFreq.getSecond(); + } + } + + cardinality = Math.min(cardinality, maxDoc); + return cardinality; + } + @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { if (reader instanceof BoboIndexReader){ BoboIndexReader boboReader = (BoboIndexReader)reader; FacetHandler facetHandler = (FacetHandler)boboReader.getFacetHandler(_name); Object obj = null; + + List> valsAndFreqs = null; + List> notsAndFreqs = null; + String[] vals = _vals; + String[] nots = _not; + int maxDoc = reader.maxDoc(); + int cardinality = maxDoc; + int notCardinality = 0; + if (facetHandler != null) { obj = facetHandler.getFacetData(boboReader); - if (_noAutoOptimize && obj!=null && obj instanceof FacetDataCache){ + if (obj != null && obj instanceof FacetDataCache) { FacetDataCache facetData = (FacetDataCache)obj; TermValueList valArray = facetData.valArray; - // copy vals - ArrayList validVals = new ArrayList(_vals.length); - - int offset = 0; - for (String val : _vals){ - int idx = valArray.indexOf(val); - - if (idx >=0) { - validVals.add(valArray.get(idx)); // get and format the value - offset = idx; - } else { - offset = -idx - 1; - } + BigSegmentedArray orderArray = facetData.orderArray; + int[] freqs = facetData.freqs; + + valsAndFreqs = getValsAndFreqs(vals, valArray, freqs); + notsAndFreqs = getValsAndFreqs(nots, valArray, freqs); + + vals = getValuesToSearch(valsAndFreqs); + nots = getValuesToSearch(notsAndFreqs); + + int positiveCardinality = estimateCardinality(valsAndFreqs, maxDoc, _isAnd); + int negativeCardinality = maxDoc - estimateCardinality(notsAndFreqs, + maxDoc, + false); + + cardinality = estimateCardinality(positiveCardinality, negativeCardinality); + + if(_noAutoOptimize) { + DocIdSet docIdSet = buildLuceneDefaultDocIdSet(boboReader, + _name, + vals, + nots, + _isAnd); + + return new SenseiDocIdSet(docIdSet, cardinality); } - return buildLuceneDefaultDocIdSet(boboReader, _name, validVals.toArray(new String[0]),_not,_isAnd); } // we get to optimize using facets BrowseSelection sel = new BrowseSelection(_name); - sel.setValues(_vals); - if (_not != null) - sel.setNotValues(_not); - if (_isAnd) + + + sel.setValues(vals); + if (nots != null) + sel.setNotValues(nots); + + if (_isAnd) { sel.setSelectionOperation(ValueOperation.ValueOperationAnd); - else + } else { sel.setSelectionOperation(ValueOperation.ValueOperationOr); - - Filter filter = facetHandler.buildFilter(sel); + } + RandomAccessFilter filter = facetHandler.buildFilter(sel); if (filter == null) filter = EmptyFilter.getInstance(); - return filter.getDocIdSet(boboReader); + double facetSelectivity = filter.getFacetSelectivity(boboReader); + cardinality = (int)(maxDoc * facetSelectivity); + + return new SenseiDocIdSet(filter.getDocIdSet(boboReader), cardinality); } - else{ + else { if (logger.isDebugEnabled()) { logger.debug("not facet support, default to term filter: "+_name); } - return buildLuceneDefaultDocIdSet(boboReader,_name,_vals,_not,_isAnd); + + DocIdSet docIdSet = buildLuceneDefaultDocIdSet(boboReader, _name, vals, nots, _isAnd); + + // Guess cardinality is 50% of documents since we don't have the facet data and because Lucene's TermDocs is + // too expensive to justify calling + int cardinalityEstimate = maxDoc >> 1; + + return new SenseiDocIdSet(docIdSet, cardinalityEstimate); } } else{ @@ -204,4 +297,42 @@ public DocIdSet getDocIdSet(IndexReader reader) throws IOException { } } + static List> getValsAndFreqs(String[] vals, TermValueList valArray, int[] freqs) { + if(vals == null) { + return null; + } + + List> valsAndFreqs = new ArrayList>(vals.length); + + + int offset = 0; + for (String val : vals) { + int idx = valArray.indexOf(val); + + if (idx >=0) { + valsAndFreqs.add(new Pair(valArray.get(idx), freqs[idx])); + offset = idx; + } else { + offset = -idx - 1; + } + } + + // Lowest cardinality docs go first to optimize the AND case + Collections.sort(valsAndFreqs, cardinalityComparator); + return valsAndFreqs; + } + + String[] getValuesToSearch(List> valsAndFreqs) + { + if(valsAndFreqs == null) { + return null; + } + + String[] valuesToSearch = new String[valsAndFreqs.size()]; + int i = 0; + for(Pair valAndFreq : valsAndFreqs) { + valuesToSearch[i++] = valAndFreq.getFirst(); + } + return valuesToSearch; + } } diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/TermFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/TermFilterConstructor.java index 9baa6533d..2662b0e95 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/TermFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/TermFilterConstructor.java @@ -28,7 +28,7 @@ public class TermFilterConstructor extends FilterConstructor{ public static final String FILTER_TYPE = "term"; @Override - protected Filter doConstructFilter(Object param) throws Exception { + protected SenseiFilter doConstructFilter(Object param) throws Exception { JSONObject json = (JSONObject)param; Iterator iter = json.keys(); diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/TermsFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/TermsFilterConstructor.java index 738376af3..4ddbabcf1 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/TermsFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/TermsFilterConstructor.java @@ -30,7 +30,7 @@ public class TermsFilterConstructor extends FilterConstructor{ public static final String FILTER_TYPE = "terms"; @Override - protected Filter doConstructFilter(Object obj) throws Exception { + protected SenseiFilter doConstructFilter(Object obj) throws Exception { JSONObject json = (JSONObject)obj; Iterator iter = json.keys(); diff --git a/sensei-core/src/main/java/com/senseidb/search/query/filters/UIDFilterConstructor.java b/sensei-core/src/main/java/com/senseidb/search/query/filters/UIDFilterConstructor.java index 40f9dd51c..aaa6f5673 100644 --- a/sensei-core/src/main/java/com/senseidb/search/query/filters/UIDFilterConstructor.java +++ b/sensei-core/src/main/java/com/senseidb/search/query/filters/UIDFilterConstructor.java @@ -20,6 +20,7 @@ import java.io.IOException; +import com.browseengine.bobo.facets.filter.RandomAccessFilter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; @@ -36,14 +37,13 @@ public class UIDFilterConstructor extends FilterConstructor{ public static final String FILTER_TYPE = "ids"; @Override - protected Filter doConstructFilter(Object obj) throws Exception { + protected SenseiFilter doConstructFilter(Object obj) throws Exception { final JSONObject json = (JSONObject)obj; - return new Filter(){ + return new SenseiFilter(){ @Override - public DocIdSet getDocIdSet(IndexReader reader) - throws IOException { - if (reader instanceof BoboIndexReader){ + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + if (reader instanceof BoboIndexReader) { BoboIndexReader boboReader = (BoboIndexReader)reader; FacetHandler uidHandler = boboReader.getFacetHandler(SenseiFacetHandlerBuilder.UID_FACET_NAME); if (uidHandler!=null && uidHandler instanceof UIDFacetHandler){ @@ -56,7 +56,9 @@ public DocIdSet getDocIdSet(IndexReader reader) uidSel.setValues(vals); if (nots != null) uidSel.setNotValues(nots); - return uidFacet.buildFilter(uidSel).getDocIdSet(boboReader); + + RandomAccessFilter raf = uidFacet.buildFilter(uidSel); + return SenseiDocIdSet.build(raf, boboReader); } catch(Exception e){ throw new IOException(e); diff --git a/sensei-core/src/main/java/com/senseidb/servlet/DefaultSenseiJSONServlet.java b/sensei-core/src/main/java/com/senseidb/servlet/DefaultSenseiJSONServlet.java index 28fc89fac..f8fec3044 100644 --- a/sensei-core/src/main/java/com/senseidb/servlet/DefaultSenseiJSONServlet.java +++ b/sensei-core/src/main/java/com/senseidb/servlet/DefaultSenseiJSONServlet.java @@ -171,14 +171,14 @@ public static JSONObject convertExpl(Explanation expl) JSONObject jsonObject = null; if (expl != null) { - jsonObject = new FastJSONObject(); + jsonObject = new FastJSONObject(5); jsonObject.put(PARAM_RESULT_HITS_EXPL_VALUE, expl.getValue()); String descr = expl.getDescription(); jsonObject.put(PARAM_RESULT_HITS_EXPL_DESC, descr == null ? "" : descr); Explanation[] details = expl.getDetails(); if (details != null) { - JSONArray detailArray = new FastJSONArray(); + JSONArray detailArray = new FastJSONArray(details.length); for (Explanation detail : details) { JSONObject subObj = convertExpl(detail); @@ -197,7 +197,7 @@ public static JSONObject convertExpl(Explanation expl) public static JSONObject convert(Map facetValueMap, SenseiRequest req) throws JSONException { - JSONObject resMap = new FastJSONObject(); + JSONObject resMap = new FastJSONObject(25); if (facetValueMap != null) { Set> entrySet = facetValueMap.entrySet(); @@ -243,7 +243,7 @@ public static JSONObject convert(Map facetValueMap, Sen if (selectedVal != null && selectedVal.length() > 0) { BrowseFacet selectedFacetVal = facetAccessible.getFacet(selectedVal); - JSONObject fv = new FastJSONObject(); + JSONObject fv = new FastJSONObject(5); fv.put(PARAM_RESULT_FACET_INFO_COUNT, selectedFacetVal == null ? 0 : selectedFacetVal.getFacetValueHitCount()); String fval = selectedFacetVal == null ? selectedVal : selectedFacetVal.getValue(); fv.put(PARAM_RESULT_FACET_INFO_VALUE, fval); @@ -349,12 +349,14 @@ public static JSONArray buildJSONHits(SenseiRequest req, SenseiHit[] hits) { Set selectSet = req.getSelectSet(); - JSONArray hitArray = new FastJSONArray(); + JSONArray hitArray = new FastJSONArray(hits.length); for (SenseiHit hit : hits) { Map fieldMap = hit.getFieldValues(); + int fieldMapSize = fieldMap == null ? 0 : fieldMap.size(); + + JSONObject hitObj = new FastJSONObject(20 + fieldMapSize); - JSONObject hitObj = new FastJSONObject(); if (selectSet == null || selectSet.contains(PARAM_RESULT_HIT_UID)) { hitObj.put(PARAM_RESULT_HIT_UID, hit.getUID()); @@ -400,18 +402,19 @@ public static JSONArray buildJSONHits(SenseiRequest req, SenseiHit[] hits) // UID is already set. continue; } - String[] vals = entry.getValue(); - JSONArray valArray = new FastJSONArray(); - if (vals != null) + if (selectSet == null || selectSet.contains(key)) { - for (String val : vals) + String[] vals = entry.getValue(); + + JSONArray valArray = new FastJSONArray(vals.length); + if (vals != null) { - valArray.put(val); + for (String val : vals) + { + valArray.put(val); + } } - } - if (selectSet == null || selectSet.contains(key)) - { hitObj.put(key, valArray); } } @@ -420,41 +423,46 @@ public static JSONArray buildJSONHits(SenseiRequest req, SenseiHit[] hits) Document doc = hit.getStoredFields(); if (doc != null) { - List storedData = new ArrayList(); - List fields = doc.getFields(); - for (Fieldable field : fields) - { - JSONObject data = new FastJSONObject(); - data.put(PARAM_RESULT_HIT_STORED_FIELDS_NAME, field.name()); - data.put(PARAM_RESULT_HIT_STORED_FIELDS_VALUE, field.stringValue()); - storedData.add(data); - } if (selectSet == null || selectSet.contains(PARAM_RESULT_HIT_STORED_FIELDS)) { + List fields = doc.getFields(); + List storedData = new ArrayList(fields.size()); + for (Fieldable field : fields) + { + JSONObject data = new FastJSONObject(4); + data.put(PARAM_RESULT_HIT_STORED_FIELDS_NAME, field.name()); + data.put(PARAM_RESULT_HIT_STORED_FIELDS_VALUE, field.stringValue()); + storedData.add(data); + } + hitObj.put(PARAM_RESULT_HIT_STORED_FIELDS, new FastJSONArray(storedData)); } } Map tvMap = hit.getTermFreqMap(); - if (tvMap!=null && tvMap.size()>0){ - JSONObject tvObj = new FastJSONObject(); + if (tvMap != null && tvMap.size() > 0){ + JSONObject tvObj = new FastJSONObject(2 * tvMap.entrySet().size()); if (selectSet == null || selectSet.contains(PARAM_RESULT_HIT_TERMVECTORS)) { hitObj.put(PARAM_RESULT_HIT_TERMVECTORS, tvObj); } + Set> entries = tvMap.entrySet(); for (Entry entry : entries){ String field = entry.getKey(); - JSONArray tvArray = new FastJSONArray(); - tvObj.put(field, tvArray); + String[] terms = entry.getValue().terms; int[] freqs = entry.getValue().freqs; + + JSONArray tvArray = new FastJSONArray(terms.length); for (int i=0;i selectList = req.getSelectList(); if (selectList != null) { - JSONArray jsonSelectList = new FastJSONArray(); + JSONArray jsonSelectList = new FastJSONArray(selectList.size()); for (String col: selectList) { jsonSelectList.put(col); @@ -507,9 +515,9 @@ public static JSONObject buildJSONResult(SenseiRequest req, SenseiResult res) } private static void addErrors(JSONObject jsonResult, SenseiResult res) throws JSONException { - JSONArray errorsJson = new FastJSONArray(); + JSONArray errorsJson = new FastJSONArray(res.getErrors().size()); for (SenseiError error: res.getErrors()) { - errorsJson.put(new FastJSONObject().put(PARAM_RESULT_ERROR_MESSAGE, error.getMessage()) + errorsJson.put(new FastJSONObject(5).put(PARAM_RESULT_ERROR_MESSAGE, error.getMessage()) .put(PARAM_RESULT_ERROR_TYPE, error.getErrorType().name()) .put(PARAM_RESULT_ERROR_CODE, error.getErrorCode())); } @@ -526,7 +534,7 @@ private static SenseiQuery buildSenseiQuery(DataConfiguration params) SenseiQuery sq; String query = params.getString(PARAM_QUERY, null); - JSONObject qjson = new FastJSONObject(); + JSONObject qjson = new FastJSONObject(30); if (query != null && query.length() > 0) { try @@ -935,7 +943,7 @@ else if (PARAM_SELECT_OP_AND.equals(op)) @Override protected String buildResultString(HttpServletRequest httpReq, SenseiSystemInfo info) throws Exception { - JSONObject jsonObj = new FastJSONObject(); + JSONObject jsonObj = new FastJSONObject(8); jsonObj.put(PARAM_SYSINFO_NUMDOCS, info.getNumDocs()); jsonObj.put(PARAM_SYSINFO_LASTMODIFIED, info.getLastModified()); jsonObj.put(PARAM_SYSINFO_VERSION, info.getVersion()); @@ -945,27 +953,26 @@ protected String buildResultString(HttpServletRequest httpReq, SenseiSystemInfo jsonObj.put(PARAM_SYSINFO_SCHEMA, new FastJSONObject(info.getSchema())); } - JSONArray jsonArray = new FastJSONArray(); - jsonObj.put(PARAM_SYSINFO_FACETS, jsonArray); Set facets = info.getFacetInfos(); + JSONArray jsonArray = new FastJSONArray(facets == null ? 0 : facets.size()); if (facets != null) { for (SenseiSystemInfo.SenseiFacetInfo facet : facets) { - JSONObject facetObj = new FastJSONObject(); + JSONObject facetObj = new FastJSONObject(6); facetObj.put(PARAM_SYSINFO_FACETS_NAME, facet.getName()); facetObj.put(PARAM_SYSINFO_FACETS_RUNTIME, facet.isRunTime()); facetObj.put(PARAM_SYSINFO_FACETS_PROPS, facet.getProps()); jsonArray.put(facetObj); } } + jsonObj.put(PARAM_SYSINFO_FACETS, jsonArray); - jsonArray = new FastJSONArray(); - jsonObj.put(PARAM_SYSINFO_CLUSTERINFO, jsonArray); List clusterInfo = info.getClusterInfo(); + jsonArray = new FastJSONArray(clusterInfo == null ? 0 : clusterInfo.size()); if (clusterInfo != null) { for (SenseiSystemInfo.SenseiNodeInfo nodeInfo : clusterInfo) { - JSONObject nodeObj = new FastJSONObject(); + JSONObject nodeObj = new FastJSONObject(7); nodeObj.put(PARAM_SYSINFO_CLUSTERINFO_ID, nodeInfo.getId()); nodeObj.put(PARAM_SYSINFO_CLUSTERINFO_PARTITIONS, new FastJSONArray(Arrays.asList(nodeInfo.getPartitions()))); nodeObj.put(PARAM_SYSINFO_CLUSTERINFO_NODELINK, nodeInfo.getNodeLink()); @@ -973,6 +980,7 @@ protected String buildResultString(HttpServletRequest httpReq, SenseiSystemInfo jsonArray.put(nodeObj); } } + jsonObj.put(PARAM_SYSINFO_CLUSTERINFO, jsonArray); return supportJsonp(httpReq, jsonObj.toString()); } diff --git a/sensei-core/src/main/java/com/senseidb/util/JSONUtil.java b/sensei-core/src/main/java/com/senseidb/util/JSONUtil.java index 7e60e8fbc..8bd45ea32 100644 --- a/sensei-core/src/main/java/com/senseidb/util/JSONUtil.java +++ b/sensei-core/src/main/java/com/senseidb/util/JSONUtil.java @@ -48,6 +48,11 @@ public FastJSONObject() _inner = new com.alibaba.fastjson.JSONObject(); } + public FastJSONObject(int capacity) + { + _inner = new com.alibaba.fastjson.JSONObject(capacity); + } + public FastJSONObject(String str) throws JSONException { try @@ -502,6 +507,11 @@ public FastJSONArray() _inner = new com.alibaba.fastjson.JSONArray(); } + public FastJSONArray(int capacity) + { + _inner = new com.alibaba.fastjson.JSONArray(capacity); + } + public FastJSONArray(String str) throws JSONException { try diff --git a/sensei-core/src/test/java/com/senseidb/search/query/filters/TestSenseiBooleanFilters.java b/sensei-core/src/test/java/com/senseidb/search/query/filters/TestSenseiBooleanFilters.java new file mode 100644 index 000000000..d8038e1b4 --- /dev/null +++ b/sensei-core/src/test/java/com/senseidb/search/query/filters/TestSenseiBooleanFilters.java @@ -0,0 +1,96 @@ +package com.senseidb.search.query.filters; + +import com.kamikaze.docidset.impl.IntArrayDocIdSet; +import junit.framework.Assert; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.DocIdSetIterator; + +import static org.easymock.classextension.EasyMock.*; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class TestSenseiBooleanFilters { + + public static SenseiFilter buildFilter(final int... elems) { + return new SenseiFilter() { + @Override + public SenseiDocIdSet getSenseiDocIdSet(IndexReader reader) throws IOException { + IntArrayDocIdSet docIdSet = new IntArrayDocIdSet(elems.length); + for(int elem : elems) { + docIdSet.addDoc(elem); + } + + return new SenseiDocIdSet(docIdSet, elems.length); + } + }; + } + + public static int getCount(DocIdSetIterator iterator) throws IOException { + int count = 0; + while(iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + count++; + } + return count; + } + + @Test + public void testAndFilter() throws IOException { + List filterList = getSenseiFilters(); + SenseiAndFilter andFilter = new SenseiAndFilter(filterList); + + IndexReader indexReader = createMock(IndexReader.class); + expect(indexReader.maxDoc()).andReturn(1000); + + replay(indexReader); + SenseiDocIdSet senseiDocIdSet = andFilter.getSenseiDocIdSet(indexReader); + Assert.assertEquals(8, senseiDocIdSet.getCardinalityEstimate()); + Assert.assertEquals(7, getCount(senseiDocIdSet.getDocIdSet().iterator())); + } + + @Test + public void testOrFilter() throws IOException { + List filterList = getSenseiFilters(); + SenseiOrFilter filter = new SenseiOrFilter(filterList); + + IndexReader indexReader = createMock(IndexReader.class); + expect(indexReader.maxDoc()).andReturn(1000); + replay(indexReader); + + SenseiDocIdSet senseiDocIdSet = filter.getSenseiDocIdSet(indexReader); + Assert.assertEquals(18, senseiDocIdSet.getCardinalityEstimate()); + Assert.assertEquals(11, getCount(senseiDocIdSet.getDocIdSet().iterator())); + + reset(indexReader); + expect(indexReader.maxDoc()).andReturn(15); + replay(indexReader); + + senseiDocIdSet = filter.getSenseiDocIdSet(indexReader); + Assert.assertEquals(15, senseiDocIdSet.getCardinalityEstimate()); + Assert.assertEquals(11, getCount(senseiDocIdSet.getDocIdSet().iterator())); + } + + @Test + public void testNotFilter() throws IOException { + List filterList = getSenseiFilters(); + SenseiNotFilter filter = new SenseiNotFilter(new SenseiAndFilter(filterList)); + + IndexReader indexReader = createMock(IndexReader.class); + expect(indexReader.maxDoc()).andReturn(20).times(2); + replay(indexReader); + + SenseiDocIdSet senseiDocIdSet = filter.getSenseiDocIdSet(indexReader); + Assert.assertEquals(20, senseiDocIdSet.getCardinalityEstimate()); + Assert.assertEquals(13, getCount(senseiDocIdSet.getDocIdSet().iterator())); + } + + + private List getSenseiFilters() { + List filterList = new ArrayList(); + filterList.add(buildFilter(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)); + filterList.add(buildFilter(2, 3, 5, 7, 11, 13, 17, 19)); + return filterList; + } +} diff --git a/sensei-core/src/test/java/com/senseidb/search/query/filters/TestSenseiTermFilter.java b/sensei-core/src/test/java/com/senseidb/search/query/filters/TestSenseiTermFilter.java new file mode 100644 index 000000000..026d6c24c --- /dev/null +++ b/sensei-core/src/test/java/com/senseidb/search/query/filters/TestSenseiTermFilter.java @@ -0,0 +1,89 @@ +package com.senseidb.search.query.filters; + +import static org.easymock.classextension.EasyMock.*; + +import com.browseengine.bobo.api.BoboIndexReader; +import com.browseengine.bobo.facets.FacetHandler; +import com.browseengine.bobo.facets.data.*; +import com.browseengine.bobo.facets.impl.MultiValueFacetHandler; +import com.browseengine.bobo.facets.impl.SimpleFacetHandler; +import com.senseidb.util.Pair; +import junit.framework.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +public class TestSenseiTermFilter { + + private String[] vals = new String[]{"a", "c", "e"}; + private int[] freqs; + TermValueList dictionary = new TermStringList(); + + @Before + public void setup() { + dictionary = new TermStringList(); + + freqs = new int[27]; + dictionary.add(null); + for(char ch = 'a'; ch <= 'z'; ch++) { + dictionary.add("" + ch); + freqs[1 + ch - 'a'] = 'z' - ch + 1; + } + } + + @Test + public void testGetValsAndFreqsAndCardinality() { + + List> valsAndFreqs = SenseiTermFilter.getValsAndFreqs(vals, dictionary, freqs); + Assert.assertEquals("c", valsAndFreqs.get(1).getFirst()); + Assert.assertEquals(24, valsAndFreqs.get(1).getSecond().intValue()); + + int andCardinality = SenseiTermFilter.estimateCardinality(valsAndFreqs, 26, true); + int orCardinality = SenseiTermFilter.estimateCardinality(valsAndFreqs, 26, false); + + Assert.assertEquals(22, andCardinality); + Assert.assertEquals(26, orCardinality); + } + + @Test + public void testSenseiTermFilter() throws IOException { + String[] vals = new String[]{"a", "c", "e"}; + + SenseiTermFilter orTermFilter = + new SenseiTermFilter("column", vals, null, false, false); + + BoboIndexReader indexReader = createMock(BoboIndexReader.class); + + MultiValueFacetDataCache facetDataCache = + new MultiValueFacetDataCache(); + facetDataCache.valArray = dictionary; + facetDataCache.freqs = freqs; + + FacetHandler facetHandler = + new MultiValueFacetHandler("column", 32); + + expect(indexReader.maxDoc()).andReturn(1000).anyTimes(); + expect(indexReader.getFacetHandler("column")).andReturn(facetHandler); + expect(indexReader.getFacetData("column")).andReturn(facetDataCache).anyTimes(); + replay(indexReader); + + SenseiDocIdSet orDocIdSet = orTermFilter.getSenseiDocIdSet(indexReader); + Assert.assertEquals(26 + 24 + 22, orDocIdSet.getCardinalityEstimate()); + + SenseiTermFilter andTermFilter = + new SenseiTermFilter("column", vals, null, true, false); + + reset(indexReader); + expect(indexReader.maxDoc()).andReturn(1000).anyTimes(); + expect(indexReader.getFacetHandler("column")).andReturn(facetHandler); + expect(indexReader.getFacetData("column")).andReturn(facetDataCache).anyTimes(); + replay(indexReader); + + SenseiDocIdSet andDocIdSet = andTermFilter.getSenseiDocIdSet(indexReader); + Assert.assertEquals(22, andDocIdSet.getCardinalityEstimate()); + } +}