From 804afd5a5ead1fd5267b9f66e2918855a7edc82c Mon Sep 17 00:00:00 2001 From: Jared Noble Date: Thu, 3 Mar 2022 16:24:17 -0700 Subject: [PATCH 1/5] fix issue where id reader keys was not split correctly with multiple slicers --- .../algorithms/id-helpers.ts | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts index 7d102637..0d5fa44c 100644 --- a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts +++ b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts @@ -1,5 +1,6 @@ /* eslint-disable no-useless-escape */ +import pMap from 'p-map'; import { CountFn, IDSlicerRange, IDSlicerRanges, IDType, ReaderSlice } from '../interfaces'; @@ -46,26 +47,33 @@ export async function determineIDSlicerRanges( num: number, getCount: CountFn ): Promise { - const results: IDSlicerRange[] = []; - const len = num; + const arrayLength = keysArray.length; + const list: string[][] = []; - let lastDivideNum = 0; - for (let i = 0; i < len; i += 1) { - let divideNum = Math.ceil(keysArray.length / len); + for (let i = 0; i < num; i += 1) { + list.push([]); + } - if (i === num - 1) { - divideNum = keysArray.length; - } + let counter = 0; - const keys = keysArray.slice(lastDivideNum, divideNum); - results.push({ - keys, - count: await getCount( - generateCountQueryForKeys(keys) - ) - }); - lastDivideNum = divideNum; + for (let i = 0; i < arrayLength; i += 1) { + // console.log('what counter', counter, keysArray[i]) + list[counter].push(keysArray[i]); + counter += 1; + + if (counter >= num) { + counter = 0; + } } - return results; + return pMap(list, async (keys) => { + const count = await getCount( + generateCountQueryForKeys(keys) + ); + + return { + keys, + count + }; + }); } From eddd828556a7679e7d844bedb0fef88905a22f44 Mon Sep 17 00:00:00 2001 From: Jared Noble Date: Fri, 4 Mar 2022 12:35:38 -0700 Subject: [PATCH 2/5] added more tests --- .../algorithms/id-helpers.ts | 2 +- .../test/reader-api-spec.ts | 214 ++++++++++++++++++ 2 files changed, 215 insertions(+), 1 deletion(-) diff --git a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts index 0d5fa44c..4bcdf57d 100644 --- a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts +++ b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts @@ -2,7 +2,7 @@ import pMap from 'p-map'; import { - CountFn, IDSlicerRange, IDSlicerRanges, IDType, ReaderSlice + CountFn, IDSlicerRanges, IDType, ReaderSlice } from '../interfaces'; export const base64url = Object.freeze(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', diff --git a/packages/elasticsearch-asset-apis/test/reader-api-spec.ts b/packages/elasticsearch-asset-apis/test/reader-api-spec.ts index 3282e240..d80b574c 100644 --- a/packages/elasticsearch-asset-apis/test/reader-api-spec.ts +++ b/packages/elasticsearch-asset-apis/test/reader-api-spec.ts @@ -15,6 +15,7 @@ import { import evenSpread from './fixtures/data/even-spread'; import { createElasticsearchReaderAPI, + DateSlicerRange, ElasticsearchReaderClient, ESReaderOptions, FetchResponseType, @@ -646,5 +647,218 @@ describe('Reader API', () => { ).toEqual(errMsg); } }); + + it('can properly make id slicer ranges', async () => { + const config: ESReaderOptions = { + ...defaultConfig, + id_field_name: idFieldName, + starting_key_depth: 0, + key_type: IDType.base64url, + }; + + const api = createElasticsearchReaderAPI({ + config, client: readerClient, logger, emitter + }); + + const results = await api.makeIDSlicerRanges({ + numOfSlicers: 7, + }); + + expect(results).toEqual([ + { + keys: [ + 'a', 'h', 'o', 'v', + 'C', 'J', 'Q', 'X', + '4', '_' + ], + count: 126 + }, + { + keys: [ + 'b', 'i', 'p', + 'w', 'D', 'K', + 'R', 'Y', '5' + ], + count: 146 + }, + { + keys: [ + 'c', 'j', 'q', + 'x', 'E', 'L', + 'S', 'Z', '6' + ], + count: 116 + }, + { + keys: [ + 'd', 'k', 'r', + 'y', 'F', 'M', + 'T', '0', '7' + ], + count: 199 + }, + { + keys: [ + 'e', 'l', 's', + 'z', 'G', 'N', + 'U', '1', '8' + ], + count: 189 + }, + { + keys: [ + 'f', 'm', 't', + 'A', 'H', 'O', + 'V', '2', '9' + ], + count: 170 + }, + { + keys: [ + 'g', 'n', 'u', + 'B', 'I', 'P', + 'W', '3', '-' + ], + count: 54 + } + ]); + }); + + it('can make date ranges for slicers', async () => { + const api = createElasticsearchReaderAPI({ + config: defaultConfig, client: readerClient, logger, emitter + }); + + const ranges = await api.makeDateSlicerRanges({ + lifecycle: 'once', + numOfSlicers: 7, + recoveryData: [], + }); + + if (!ranges || ranges.length === 0) { + throw new Error('Faulty test'); + } + + const data = ranges + .map((node) => { + if (node == null) return node; + const { + count, + dates: mDates, + interval, + range: mRange + } = node as DateSlicerRange; + return { + count, + interval, + dates: { + start: mDates.start.toISOString(), + end: mDates.end.toISOString(), + limit: mDates.limit.toISOString(), + ...(mDates.holes && { holes: mDates.holes }) + }, + range: { + start: mRange.start.toISOString(), + limit: mRange.limit.toISOString() + } + }; + }); + + expect(data.length).toBeGreaterThan(0); + + expect(data).toMatchObject([ + { + count: 71, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.201Z', + end: '2019-04-26T15:00:23.228Z', + limit: '2019-04-26T15:00:23.228Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 89, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.228Z', + end: '2019-04-26T15:00:23.255Z', + limit: '2019-04-26T15:00:23.255Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 124, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.255Z', + end: '2019-04-26T15:00:23.282Z', + limit: '2019-04-26T15:00:23.282Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 109, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.282Z', + end: '2019-04-26T15:00:23.309Z', + limit: '2019-04-26T15:00:23.309Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 204, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.309Z', + end: '2019-04-26T15:00:23.336Z', + limit: '2019-04-26T15:00:23.336Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 118, + interval: [27, 'ms'], + dates: { + start: '2019-04-26T15:00:23.336Z', + end: '2019-04-26T15:00:23.363Z', + limit: '2019-04-26T15:00:23.363Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + }, + { + count: 285, + interval: [31, 'ms'], + dates: { + start: '2019-04-26T15:00:23.363Z', + end: '2019-04-26T15:00:23.394Z', + limit: '2019-04-26T15:00:23.394Z' + }, + range: { + start: '2019-04-26T15:00:23.201Z', + limit: '2019-04-26T15:00:23.394Z' + } + } + ]); + }); }); }); From 4220bb39b3e17b1bce9ae124830fa0a378c693c4 Mon Sep 17 00:00:00 2001 From: Jared Noble Date: Fri, 4 Mar 2022 12:37:07 -0700 Subject: [PATCH 3/5] bump packages --- asset/package.json | 4 ++-- package.json | 2 +- packages/elasticsearch-asset-apis/package.json | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/asset/package.json b/asset/package.json index e33487ce..03744a83 100644 --- a/asset/package.json +++ b/asset/package.json @@ -1,6 +1,6 @@ { "name": "asset", - "version": "2.7.11", + "version": "2.7.12", "private": true, "workspaces": { "nohoist": [ @@ -18,7 +18,7 @@ "dependencies": { "@terascope/data-mate": "^0.37.4", "@terascope/elasticsearch-api": "^3.0.2", - "@terascope/elasticsearch-asset-apis": "^0.8.5", + "@terascope/elasticsearch-asset-apis": "^0.8.6", "@terascope/job-components": "^0.56.1", "@terascope/teraslice-state-storage": "^0.33.2", "@terascope/utils": "^0.44.1", diff --git a/package.json b/package.json index 125d2b6d..becdfbcd 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "elasticsearch-assets", "description": "bundle of processors for teraslice", - "version": "2.7.11", + "version": "2.7.12", "private": true, "workspaces": [ "packages/*", diff --git a/packages/elasticsearch-asset-apis/package.json b/packages/elasticsearch-asset-apis/package.json index 7be8b2a4..14024aa8 100644 --- a/packages/elasticsearch-asset-apis/package.json +++ b/packages/elasticsearch-asset-apis/package.json @@ -1,6 +1,6 @@ { "name": "@terascope/elasticsearch-asset-apis", - "version": "0.8.5", + "version": "0.8.6", "description": "Elasticsearch reader and sender apis", "publishConfig": { "access": "public" From 182df36d6cce595f5ded284571cd21afe9378a33 Mon Sep 17 00:00:00 2001 From: Jared Noble Date: Fri, 4 Mar 2022 14:32:04 -0700 Subject: [PATCH 4/5] update packages --- package.json | 4 ++-- yarn.lock | 21 +++++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/package.json b/package.json index becdfbcd..f4219447 100644 --- a/package.json +++ b/package.json @@ -48,8 +48,8 @@ "@types/bluebird": "^3.5.36", "@types/elasticsearch": "^5.0.40", "@types/got": "^9.6.12", - "@types/jest": "^27.4.0", - "@types/node": "^16.11.26", + "@types/jest": "^27.4.1", + "@types/node": "^17.0.21", "elasticsearch": "^15.4.1", "elasticsearch-store": "^0.60.4", "eslint": "^8.10.0", diff --git a/yarn.lock b/yarn.lock index cdf5040b..a0b326bc 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1182,12 +1182,12 @@ dependencies: "@types/istanbul-lib-report" "*" -"@types/jest@^27.4.0": - version "27.4.0" - resolved "https://registry.yarnpkg.com/@types/jest/-/jest-27.4.0.tgz#037ab8b872067cae842a320841693080f9cb84ed" - integrity sha512-gHl8XuC1RZ8H2j5sHv/JqsaxXkDDM9iDOgu0Wp8sjs4u/snb2PVehyWXJPr+ORA0RPpgw231mnutWI1+0hgjIQ== +"@types/jest@^27.4.1": + version "27.4.1" + resolved "https://registry.yarnpkg.com/@types/jest/-/jest-27.4.1.tgz#185cbe2926eaaf9662d340cc02e548ce9e11ab6d" + integrity sha512-23iPJADSmicDVrWk+HT58LMJtzLAnB2AgIzplQuq/bSrGaxCrlvRFjGbXmamnnk/mAmCdLStiGqggu28ocUyiw== dependencies: - jest-diff "^27.0.0" + jest-matcher-utils "^27.0.0" pretty-format "^27.0.0" "@types/json-schema@^7.0.9": @@ -1212,11 +1212,16 @@ resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.178.tgz#341f6d2247db528d4a13ddbb374bcdc80406f4f8" integrity sha512-0d5Wd09ItQWH1qFbEyQ7oTQ3GZrMfth5JkbN3EvTKLXcHLRDSXeLnlvlOn0wvxVIwK5o2M8JzP/OWz7T3NRsbw== -"@types/node@*", "@types/node@^16.11.26": +"@types/node@*": version "16.11.26" resolved "https://registry.yarnpkg.com/@types/node/-/node-16.11.26.tgz#63d204d136c9916fb4dcd1b50f9740fe86884e47" integrity sha512-GZ7bu5A6+4DtG7q9GsoHXy3ALcgeIHP4NnL0Vv2wu0uUB/yQex26v0tf6/na1mm0+bS9Uw+0DFex7aaKr2qawQ== +"@types/node@^17.0.21": + version "17.0.21" + resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.21.tgz#864b987c0c68d07b4345845c3e63b75edd143644" + integrity sha512-DBZCJbhII3r90XbQxI8Y9IjjiiOGlZ0Hr32omXIZvwwZ7p4DMMXGrKXVyPfuoBOri9XNtL0UK69jYIBIsRX3QQ== + "@types/prettier@^2.1.5": version "2.4.2" resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.4.2.tgz#4c62fae93eb479660c3bd93f9d24d561597a8281" @@ -3474,7 +3479,7 @@ jest-config@^27.5.1: slash "^3.0.0" strip-json-comments "^3.1.1" -jest-diff@^27.0.0, jest-diff@^27.2.5: +jest-diff@^27.2.5: version "27.4.6" resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-27.4.6.tgz#93815774d2012a2cbb6cf23f84d48c7a2618f98d" integrity sha512-zjaB0sh0Lb13VyPsd92V7HkqF6yKRH9vm33rwBt7rPYrpQvS1nCvlIy2pICbKta+ZjWngYLNn4cCK4nyZkjS/w== @@ -3606,7 +3611,7 @@ jest-leak-detector@^27.5.1: jest-get-type "^27.5.1" pretty-format "^27.5.1" -jest-matcher-utils@^27.5.1: +jest-matcher-utils@^27.0.0, jest-matcher-utils@^27.5.1: version "27.5.1" resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-27.5.1.tgz#9c0cdbda8245bc22d2331729d1091308b40cf8ab" integrity sha512-z2uTx/T6LBaCoNWNFWwChLBKYxTMcGBRjAt+2SbP929/Fflb9aa5LGma654Rz8z9HLxsrUaYzxE9T/EFIL/PAw== From 091ce7fe8d7fdc8818b3dbfcbb77c6402b6062b5 Mon Sep 17 00:00:00 2001 From: Jared Noble Date: Fri, 4 Mar 2022 14:58:00 -0700 Subject: [PATCH 5/5] clean files --- .../src/elasticsearch-reader-api/algorithms/id-helpers.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts index 4bcdf57d..a5311aa0 100644 --- a/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts +++ b/packages/elasticsearch-asset-apis/src/elasticsearch-reader-api/algorithms/id-helpers.ts @@ -57,7 +57,6 @@ export async function determineIDSlicerRanges( let counter = 0; for (let i = 0; i < arrayLength; i += 1) { - // console.log('what counter', counter, keysArray[i]) list[counter].push(keysArray[i]); counter += 1;