diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..47aa7730 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "cSpell.words": [ + "subslice" + ] +} diff --git a/README.md b/README.md index 232d15fb..ae888a82 100644 --- a/README.md +++ b/README.md @@ -12,22 +12,22 @@ yarn global add teraslice-cli # Step 2: teraslice-cli assets deploy --build - ``` -## APIS - * [Elasticsearch Reader API](./docs/apis/elasticsearch_reader_api.md) - * [Elasticsearch Sender API](./docs/apis/elasticsearch_sender_api.md) - * [Spaces Reader API](./docs/apis/spaces_reader_api.md) - * [Elasticsearch State Storage](./docs/apis/elasticsearch_state_storage.md) +## APIS +* [Elasticsearch Reader API](./docs/apis/elasticsearch_reader_api.md) +* [Elasticsearch Sender API](./docs/apis/elasticsearch_sender_api.md) +* [Spaces Reader API](./docs/apis/spaces_reader_api.md) +* [Elasticsearch State Storage](./docs/apis/elasticsearch_state_storage.md) ## Operations - * [elasticsearch_reader](./docs/operations/elasticsearch_reader.md) - * [elasticsearch_bulk](./docs/operations/elasticsearch_bulk.md) - * [elasticsearch_data_generator](./docs/operations/elasticsearch_data_generator.md) - * [id_reader](./docs/operations/id_reader.md) - * [spaces_reader](./docs/operations/spaces_reader.md) + +* [elasticsearch_reader](./docs/operations/elasticsearch_reader.md) +* [elasticsearch_bulk](./docs/operations/elasticsearch_bulk.md) +* [elasticsearch_data_generator](./docs/operations/elasticsearch_data_generator.md) +* [id_reader](./docs/operations/id_reader.md) +* [spaces_reader](./docs/operations/spaces_reader.md) ## Contributing diff --git a/asset/asset.json b/asset/asset.json index 93017b3a..1ce47eb3 100644 --- a/asset/asset.json +++ b/asset/asset.json @@ -1,4 +1,4 @@ { "name": "elasticsearch", - "version": "3.2.0" + "version": "3.3.0" } diff --git a/asset/package.json b/asset/package.json index 1890fe70..a2f37c7c 100644 --- a/asset/package.json +++ b/asset/package.json @@ -1,6 +1,6 @@ { "name": "asset", - "version": "3.2.0", + "version": "3.3.0", "private": true, "workspaces": { "nohoist": [ diff --git a/asset/src/elasticsearch_reader_api/schema.ts b/asset/src/elasticsearch_reader_api/schema.ts index 3fe72f8c..33ac7cb8 100644 --- a/asset/src/elasticsearch_reader_api/schema.ts +++ b/asset/src/elasticsearch_reader_api/schema.ts @@ -236,6 +236,11 @@ export const schema = { } } }, + useSimpleFetch: { + doc: 'Use the original fetch algorithm, that sets query size to windowSize without extra retry logic', + default: false, + format: Boolean + } }; export default class Schema extends ConvictSchema { diff --git a/package.json b/package.json index 8241dec6..5665921f 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "elasticsearch-assets", "description": "bundle of processors for teraslice", - "version": "3.2.0", + "version": "3.3.0", "private": true, "workspaces": [ "packages/*", diff --git a/packages/elasticsearch-asset-apis/package.json b/packages/elasticsearch-asset-apis/package.json index acc9e972..a49e4b6a 100644 --- a/packages/elasticsearch-asset-apis/package.json +++ b/packages/elasticsearch-asset-apis/package.json @@ -1,6 +1,6 @@ { "name": "@terascope/elasticsearch-asset-apis", - "version": "0.11.0", + "version": "0.11.1", "description": "Elasticsearch reader and sender apis", "publishConfig": { "access": "public" diff --git a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/ElasticsearchReaderAPI.ts b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/ElasticsearchReaderAPI.ts index 40ae6215..de54a21b 100644 --- a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/ElasticsearchReaderAPI.ts +++ b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/ElasticsearchReaderAPI.ts @@ -95,6 +95,10 @@ export class ElasticsearchReaderAPI { * https://github.com/terascope/elasticsearch-assets/issues/948 */ async fetch(queryParams: ReaderSlice = {}): Promise { + if (this.config.useSimpleFetch) { + return this.simpleFetch(queryParams); + } + const countExpansionFactor = 1.5; let querySize = 10000; const retryLimit = 5; @@ -163,6 +167,22 @@ export class ElasticsearchReaderAPI { return result; } + async simpleFetch(queryParams: ReaderSlice = {}): Promise { + // attempt to get window if not set + if (!this.windowSize) await this.setWindowSize(); + + // if we did go ahead and complete query + const query = buildQuery(this.config, { + ...queryParams, count: this.windowSize + }); + + return this.client.search( + query, + this.config.response_type ?? FetchResponseType.data_entities, + this.config.type_config + ); + } + /** * Handles multiple result types and returns the number of returned records * @param result the object returned that contains the search results diff --git a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/index.ts b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/index.ts index 4f9601cb..bf5884c1 100644 --- a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/index.ts +++ b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/index.ts @@ -52,6 +52,10 @@ export async function createSpacesReaderAPI({ if (config.response_type === FetchResponseType.data_frame && !config.type_config) { config.type_config = await client.getDataType(); } + // simpleFetch should be used by the spaces reader to avoid the search size + // expansion algorithm, this is because most endpoints are configured with + // a much lower search limit than the backend clusters + config.useSimpleFetch = true; return new ElasticsearchReaderAPI(config, client, emitter, logger); } diff --git a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/interfaces.ts b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/interfaces.ts index 4e5fd8a5..7641b742 100644 --- a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/interfaces.ts +++ b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/interfaces.ts @@ -357,6 +357,7 @@ export interface ESReaderOptions { starting_key_depth: number; response_type?: FetchResponseType; type_config?: DataTypeConfig + useSimpleFetch?: boolean; } export interface SpacesAPIConfig extends ESReaderOptions { diff --git a/test/spaces_reader/fetcher-spec.ts b/test/spaces_reader/fetcher-spec.ts index 09783dab..2328a6d6 100644 --- a/test/spaces_reader/fetcher-spec.ts +++ b/test/spaces_reader/fetcher-spec.ts @@ -42,7 +42,7 @@ describe('spaces_reader fetcher', () => { ['range query', { query: { q: 'date:[2017-09-23T18:07:14.332Z TO 2017-09-25T18:07:14.332Z}', - size: 150 + size: 100 }, opConfig: { token, @@ -60,7 +60,7 @@ describe('spaces_reader fetcher', () => { ['lucene query', { query: { q: '(foo:bar)', - size: 7500, + size: 5000, }, opConfig: { query: 'foo:bar', @@ -75,7 +75,7 @@ describe('spaces_reader fetcher', () => { ['lucene query with url characters', { query: { q: '(foo:"bar+baz")', - size: 7500, + size: 5000, }, opConfig: { query: 'foo:"bar+baz"', @@ -90,7 +90,7 @@ describe('spaces_reader fetcher', () => { ['lucene query with fields', { query: { q: '(test:query OR other:thing AND bytes:>=2000)', - size: 150, + size: 100, fields: 'foo,bar,date' }, opConfig: { @@ -107,7 +107,7 @@ describe('spaces_reader fetcher', () => { ['lucene query with date range', { query: { q: 'example_date:[2017-09-23T18:07:14.332Z TO 2017-09-25T18:07:14.332Z} AND (foo:bar)', - size: 300, + size: 200, }, opConfig: { query: 'foo:bar', @@ -124,7 +124,7 @@ describe('spaces_reader fetcher', () => { ['lucene query with geo point query', { query: { q: '(foo:bar)', - size: 150, + size: 100, geo_point: '52.3456,79.6784', geo_distance: '200km' }, @@ -144,7 +144,7 @@ describe('spaces_reader fetcher', () => { ['lucene query with geo bounding box query', { query: { q: '(foo:bar)', - size: 1500, + size: 100000, geo_box_top_left: '34.5234,79.42345', geo_box_bottom_right: '54.5234,80.3456', geo_sort_point: '52.3456,79.6784' @@ -159,9 +159,7 @@ describe('spaces_reader fetcher', () => { geo_box_bottom_right: '54.5234,80.3456', geo_sort_point: '52.3456,79.6784', }, - msg: { - count: 1000 - } + msg: {} }], ])('when performing a %s', (m, { query, opConfig: _opConfig, msg }) => { @@ -185,6 +183,9 @@ describe('spaces_reader fetcher', () => { ] }), { clients }); + // query size are overridden for unbounded fetches + query.size = maxSize; + let results: DataEntity[]; beforeEach(async () => { @@ -204,7 +205,6 @@ describe('spaces_reader fetcher', () => { }); await harness.initialize(); - // FIXME: is msg ... a slice? results = await harness.runSlice(msg); }); @@ -257,7 +257,7 @@ describe('spaces_reader fetcher', () => { scope.post(`/${testIndex}?token=${token}`, { q: '(test:query)', - size: 7500, + size: 100000, }) .delay(500) .reply(200, {