diff --git a/asset/package.json b/asset/package.json index e33487ce..03744a83 100644 --- a/asset/package.json +++ b/asset/package.json @@ -1,6 +1,6 @@ { "name": "asset", - "version": "2.7.11", + "version": "2.7.12", "private": true, "workspaces": { "nohoist": [ @@ -18,7 +18,7 @@ "dependencies": { "@terascope/data-mate": "^0.37.4", "@terascope/elasticsearch-api": "^3.0.2", - "@terascope/elasticsearch-asset-apis": "^0.8.5", + "@terascope/elasticsearch-asset-apis": "^0.8.6", "@terascope/job-components": "^0.56.1", "@terascope/teraslice-state-storage": "^0.33.2", "@terascope/utils": "^0.44.1", diff --git a/package.json b/package.json index ed9c6e1e..f4219447 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "elasticsearch-assets", "description": "bundle of processors for teraslice", - "version": "2.7.11", + "version": "2.7.12", "private": true, "workspaces": [ "packages/*", @@ -49,7 +49,7 @@ "@types/elasticsearch": "^5.0.40", "@types/got": "^9.6.12", "@types/jest": "^27.4.1", - "@types/node": "^16.11.26", + "@types/node": "^17.0.21", "elasticsearch": "^15.4.1", "elasticsearch-store": "^0.60.4", "eslint": "^8.10.0", diff --git a/packages/elasticsearch-asset-apis/package.json b/packages/elasticsearch-asset-apis/package.json index 7be8b2a4..14024aa8 100644 --- a/packages/elasticsearch-asset-apis/package.json +++ b/packages/elasticsearch-asset-apis/package.json @@ -1,6 +1,6 @@ { "name": "@terascope/elasticsearch-asset-apis", - "version": "0.8.5", + "version": "0.8.6", "description": "Elasticsearch reader and sender apis", "publishConfig": { "access": "public" diff --git a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts index 7d102637..a5311aa0 100644 --- a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts +++ b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts @@ -1,7 +1,8 @@ /* eslint-disable no-useless-escape */ +import pMap from 'p-map'; import { - CountFn, IDSlicerRange, IDSlicerRanges, IDType, ReaderSlice + CountFn, IDSlicerRanges, IDType, ReaderSlice } from '../interfaces'; export const base64url = Object.freeze(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', @@ -46,26 +47,32 @@ export async function determineIDSlicerRanges( num: number, getCount: CountFn ): Promise { - const results: IDSlicerRange[] = []; - const len = num; + const arrayLength = keysArray.length; + const list: string[][] = []; - let lastDivideNum = 0; - for (let i = 0; i < len; i += 1) { - let divideNum = Math.ceil(keysArray.length / len); + for (let i = 0; i < num; i += 1) { + list.push([]); + } - if (i === num - 1) { - divideNum = keysArray.length; - } + let counter = 0; - const keys = keysArray.slice(lastDivideNum, divideNum); - results.push({ - keys, - count: await getCount( - generateCountQueryForKeys(keys) - ) - }); - lastDivideNum = divideNum; + for (let i = 0; i < arrayLength; i += 1) { + list[counter].push(keysArray[i]); + counter += 1; + + if (counter >= num) { + counter = 0; + } } - return results; + return pMap(list, async (keys) => { + const count = await getCount( + generateCountQueryForKeys(keys) + ); + + return { + keys, + count + }; + }); } diff --git a/packages/elasticsearch-asset-apis/test/reader-api-spec.ts b/packages/elasticsearch-asset-apis/test/reader-api-spec.ts index 3282e240..d80b574c 100644 --- a/packages/elasticsearch-asset-apis/test/reader-api-spec.ts +++ b/packages/elasticsearch-asset-apis/test/reader-api-spec.ts @@ -15,6 +15,7 @@ import { import evenSpread from './fixtures/data/even-spread'; import { createElasticsearchReaderAPI, + DateSlicerRange, ElasticsearchReaderClient, ESReaderOptions, FetchResponseType, @@ -646,5 +647,218 @@ describe('Reader API', () => { ).toEqual(errMsg); } }); + + it('can properly make id slicer ranges', async () => { + const config: ESReaderOptions = { + ...defaultConfig, + id_field_name: idFieldName, + starting_key_depth: 0, + key_type: IDType.base64url, + }; + + const api = createElasticsearchReaderAPI({ + config, client: readerClient, logger, emitter + }); + + const results = await api.makeIDSlicerRanges({ + numOfSlicers: 7, + }); + + expect(results).toEqual([ + { + keys: [ + 'a', 'h', 'o', 'v', + 'C', 'J', 'Q', 'X', + '4', '_' + ], + count: 126 + }, + { + keys: [ + 'b', 'i', 'p', + 'w', 'D', 'K', + 'R', 'Y', '5' + ], + count: 146 + }, + { + keys: [ + 'c', 'j', 'q', + 'x', 'E', 'L', + 'S', 'Z', '6' + ], + count: 116 + }, + { + keys: [ + 'd', 'k', 'r', + 'y', 'F', 'M', + 'T', '0', '7' + ], + count: 199 + }, + { + keys: [ + 'e', 'l', 's', + 'z', 'G', 'N', + 'U', '1', '8' + ], + count: 189 + }, + { + keys: [ + 'f', 'm', 't', + 'A', 'H', 'O', + 'V', '2', '9' + ], + count: 170 + }, + { + keys: [ + 'g', 'n', 'u', + 'B', 'I', 'P', + 'W', '3', '-' + ], + count: 54 + } + ]); + }); + + it('can make date ranges for slicers', async () => { + const api = createElasticsearchReaderAPI({ + config: defaultConfig, client: readerClient, logger, emitter + }); + + const ranges = await api.makeDateSlicerRanges({ + lifecycle: 'once', + numOfSlicers: 7, + recoveryData: [], + }); + + if (!ranges || ranges.length === 0) { + throw new Error('Faulty test'); + } + + const data = ranges + .map((node) => { + if (node == null) return node; + const { + count, + dates: mDates, + interval, + range: mRange + } = node as DateSlicerRange; + return { + count, + interval, + dates: { + start: mDates.start.toISOString(), + end: mDates.end.toISOString(), + limit: mDates.limit.toISOString(), + ...(mDates.holes && { holes: mDates.holes }) + }, + range: { + start: mRange.start.toISOString(), + limit: mRange.limit.toISOString() + } + }; + }); + + expect(data.length).toBeGreaterThan(0); + + expect(data).toMatchObject([ + { + count: 71, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.201Z', + end: '2019-04-26T15:00:23.228Z', + limit: '2019-04-26T15:00:23.228Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 89, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.228Z', + end: '2019-04-26T15:00:23.255Z', + limit: '2019-04-26T15:00:23.255Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 124, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.255Z', + end: '2019-04-26T15:00:23.282Z', + limit: '2019-04-26T15:00:23.282Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 109, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.282Z', + end: '2019-04-26T15:00:23.309Z', + limit: '2019-04-26T15:00:23.309Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 204, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.309Z', + end: '2019-04-26T15:00:23.336Z', + limit: '2019-04-26T15:00:23.336Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 118, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.336Z', + end: '2019-04-26T15:00:23.363Z', + limit: '2019-04-26T15:00:23.363Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 285, + interval: [31, 'ms'], + dates: { + start: '2019-04-26T15:00:23.363Z', + end: '2019-04-26T15:00:23.394Z', + limit: '2019-04-26T15:00:23.394Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + } + ]); + }); }); }); diff --git a/yarn.lock b/yarn.lock index 527a4c60..fbf9e867 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1212,11 +1212,16 @@ resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.178.tgz#341f6d2247db528d4a13ddbb374bcdc80406f4f8" integrity sha512-0d5Wd09ItQWH1qFbEyQ7oTQ3GZrMfth5JkbN3EvTKLXcHLRDSXeLnlvlOn0wvxVIwK5o2M8JzP/OWz7T3NRsbw== -"@types/node@*", "@types/node@^16.11.26": +"@types/node@*": version "16.11.26" resolved "https://registry.yarnpkg.com/@types/node/-/node-16.11.26.tgz#63d204d136c9916fb4dcd1b50f9740fe86884e47" integrity sha512-GZ7bu5A6+4DtG7q9GsoHXy3ALcgeIHP4NnL0Vv2wu0uUB/yQex26v0tf6/na1mm0+bS9Uw+0DFex7aaKr2qawQ== +"@types/node@^17.0.21": + version "17.0.21" + resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.21.tgz#864b987c0c68d07b4345845c3e63b75edd143644" + integrity sha512-DBZCJbhII3r90XbQxI8Y9IjjiiOGlZ0Hr32omXIZvwwZ7p4DMMXGrKXVyPfuoBOri9XNtL0UK69jYIBIsRX3QQ== + "@types/prettier@^2.1.5": version "2.4.2" resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.4.2.tgz#4c62fae93eb479660c3bd93f9d24d561597a8281"