From 57c21d74e0736267c94182e996952eca7b0d1009 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Wed, 15 Nov 2023 00:08:01 +0200 Subject: [PATCH] DBSQLOperation Refactoring (3 of 3) (#198) * Refactoring: Introduce concept of results provider; convert FetchResultsHelper into provider of TRowSet Signed-off-by: Levko Kravets * Convert Json/Arrow/CloudFetch result handlers to implement result provider interface Signed-off-by: Levko Kravets * Refine the code and update tests Signed-off-by: Levko Kravets --------- Signed-off-by: Levko Kravets --- lib/DBSQLOperation/index.ts | 39 ++++----- .../{ArrowResult.ts => ArrowResultHandler.ts} | 36 +++++--- ...chResult.ts => CloudFetchResultHandler.ts} | 24 +++--- lib/result/IOperationResult.ts | 7 -- lib/result/IResultsProvider.ts | 9 ++ .../{JsonResult.ts => JsonResultHandler.ts} | 28 +++--- .../RowSetProvider.ts} | 11 ++- tests/e2e/arrow.test.js | 19 +++-- tests/e2e/cloudfetch.test.js | 12 +-- tests/unit/DBSQLOperation.test.js | 12 +-- ...ult.test.js => ArrowResultHandler.test.js} | 58 +++++++++---- ...est.js => CloudFetchResultHandler.test.js} | 75 +++++++++------- ...sult.test.js => JsonResultHandler.test.js} | 85 +++++++------------ tests/unit/result/compatibility.test.js | 21 +++-- .../result/fixtures/RowSetProviderMock.js | 15 ++++ 15 files changed, 254 insertions(+), 197 deletions(-) rename lib/result/{ArrowResult.ts => ArrowResultHandler.ts} (82%) rename lib/result/{CloudFetchResult.ts => CloudFetchResultHandler.ts} (71%) delete mode 100644 lib/result/IOperationResult.ts create mode 100644 lib/result/IResultsProvider.ts rename lib/result/{JsonResult.ts => JsonResultHandler.ts} (73%) rename lib/{DBSQLOperation/FetchResultsHelper.ts => result/RowSetProvider.ts} (88%) rename tests/unit/result/{ArrowResult.test.js => ArrowResultHandler.test.js} (59%) rename tests/unit/result/{CloudFetchResult.test.js => CloudFetchResultHandler.test.js} (74%) rename tests/unit/result/{JsonResult.test.js => JsonResultHandler.test.js} (86%) create mode 100644 tests/unit/result/fixtures/RowSetProviderMock.js diff --git a/lib/DBSQLOperation/index.ts b/lib/DBSQLOperation/index.ts index 5198d726..08e32864 100644 --- a/lib/DBSQLOperation/index.ts +++ b/lib/DBSQLOperation/index.ts @@ -16,13 +16,13 @@ import { TOperationState, } from '../../thrift/TCLIService_types'; import Status from '../dto/Status'; -import FetchResultsHelper from './FetchResultsHelper'; import { LogLevel } from '../contracts/IDBSQLLogger'; import OperationStateError, { OperationStateErrorCode } from '../errors/OperationStateError'; -import IOperationResult from '../result/IOperationResult'; -import JsonResult from '../result/JsonResult'; -import ArrowResult from '../result/ArrowResult'; -import CloudFetchResult from '../result/CloudFetchResult'; +import IResultsProvider from '../result/IResultsProvider'; +import RowSetProvider from '../result/RowSetProvider'; +import JsonResultHandler from '../result/JsonResultHandler'; +import ArrowResultHandler from '../result/ArrowResultHandler'; +import CloudFetchResultHandler from '../result/CloudFetchResultHandler'; import { definedOrError } from '../utils'; import HiveDriverError from '../errors/HiveDriverError'; import IClientContext from '../contracts/IClientContext'; @@ -50,7 +50,7 @@ export default class DBSQLOperation implements IOperation { public onClose?: () => void; - private readonly _data: FetchResultsHelper; + private readonly _data: RowSetProvider; private readonly closeOperation?: TCloseOperationResp; @@ -68,7 +68,7 @@ export default class DBSQLOperation implements IOperation { private hasResultSet: boolean = false; - private resultHandler?: IOperationResult; + private resultHandler?: IResultsProvider>; constructor({ handle, directResults, context }: DBSQLOperationConstructorOptions) { this.operationHandle = handle; @@ -82,7 +82,7 @@ export default class DBSQLOperation implements IOperation { } this.metadata = directResults?.resultSetMetadata; - this._data = new FetchResultsHelper( + this._data = new RowSetProvider( this.context, this.operationHandle, [directResults?.resultSet], @@ -135,14 +135,12 @@ export default class DBSQLOperation implements IOperation { await this.waitUntilReady(options); - const [resultHandler, data] = await Promise.all([ - this.getResultHandler(), - this._data.fetch(options?.maxRows || defaultMaxRows), - ]); + const resultHandler = await this.getResultHandler(); + await this.failIfClosed(); + const result = resultHandler.fetchNext({ limit: options?.maxRows || defaultMaxRows }); await this.failIfClosed(); - const result = await resultHandler.getValue(data ? [data] : []); this.context .getLogger() .log( @@ -234,14 +232,9 @@ export default class DBSQLOperation implements IOperation { return false; } - // Return early if there are still data available for fetching - if (this._data.hasMoreRows) { - return true; - } - // If we fetched all the data from server - check if there's anything buffered in result handler const resultHandler = await this.getResultHandler(); - return resultHandler.hasPendingData(); + return resultHandler.hasMore(); } public async getSchema(options?: GetSchemaOptions): Promise { @@ -342,20 +335,20 @@ export default class DBSQLOperation implements IOperation { return this.metadata; } - private async getResultHandler(): Promise { + private async getResultHandler(): Promise>> { const metadata = await this.fetchMetadata(); const resultFormat = definedOrError(metadata.resultFormat); if (!this.resultHandler) { switch (resultFormat) { case TSparkRowSetType.COLUMN_BASED_SET: - this.resultHandler = new JsonResult(this.context, metadata.schema); + this.resultHandler = new JsonResultHandler(this.context, this._data, metadata.schema); break; case TSparkRowSetType.ARROW_BASED_SET: - this.resultHandler = new ArrowResult(this.context, metadata.schema, metadata.arrowSchema); + this.resultHandler = new ArrowResultHandler(this.context, this._data, metadata.schema, metadata.arrowSchema); break; case TSparkRowSetType.URL_BASED_SET: - this.resultHandler = new CloudFetchResult(this.context, metadata.schema); + this.resultHandler = new CloudFetchResultHandler(this.context, this._data, metadata.schema); break; default: this.resultHandler = undefined; diff --git a/lib/result/ArrowResult.ts b/lib/result/ArrowResultHandler.ts similarity index 82% rename from lib/result/ArrowResult.ts rename to lib/result/ArrowResultHandler.ts index b44ae305..a1076293 100644 --- a/lib/result/ArrowResult.ts +++ b/lib/result/ArrowResultHandler.ts @@ -13,7 +13,7 @@ import { } from 'apache-arrow'; import { TRowSet, TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import IOperationResult from './IOperationResult'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; import { getSchemaColumns, convertThriftValue } from './utils'; const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; @@ -21,28 +21,38 @@ const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; type ArrowSchema = Schema; type ArrowSchemaField = Field>; -export default class ArrowResult implements IOperationResult { +export default class ArrowResultHandler implements IResultsProvider> { protected readonly context: IClientContext; + private readonly source: IResultsProvider; + private readonly schema: Array; private readonly arrowSchema?: Buffer; - constructor(context: IClientContext, schema?: TTableSchema, arrowSchema?: Buffer) { + constructor( + context: IClientContext, + source: IResultsProvider, + schema?: TTableSchema, + arrowSchema?: Buffer, + ) { this.context = context; + this.source = source; this.schema = getSchemaColumns(schema); this.arrowSchema = arrowSchema; } - async hasPendingData() { - return false; + public async hasMore() { + return this.source.hasMore(); } - async getValue(data?: Array) { - if (this.schema.length === 0 || !this.arrowSchema || !data) { + public async fetchNext(options: ResultsProviderFetchNextOptions) { + if (this.schema.length === 0 || !this.arrowSchema) { return []; } + const data = await this.source.fetchNext(options); + const batches = await this.getBatches(data); if (batches.length === 0) { return []; @@ -52,15 +62,13 @@ export default class ArrowResult implements IOperationResult { return this.getRows(table.schema, table.toArray()); } - protected async getBatches(data: Array): Promise> { + protected async getBatches(rowSet?: TRowSet): Promise> { const result: Array = []; - data.forEach((rowSet) => { - rowSet.arrowBatches?.forEach((arrowBatch) => { - if (arrowBatch.batch) { - result.push(arrowBatch.batch); - } - }); + rowSet?.arrowBatches?.forEach((arrowBatch) => { + if (arrowBatch.batch) { + result.push(arrowBatch.batch); + } }); return result; diff --git a/lib/result/CloudFetchResult.ts b/lib/result/CloudFetchResultHandler.ts similarity index 71% rename from lib/result/CloudFetchResult.ts rename to lib/result/CloudFetchResultHandler.ts index 31fbd633..a49e8714 100644 --- a/lib/result/CloudFetchResult.ts +++ b/lib/result/CloudFetchResultHandler.ts @@ -2,29 +2,31 @@ import { Buffer } from 'buffer'; import fetch, { RequestInfo, RequestInit } from 'node-fetch'; import { TRowSet, TSparkArrowResultLink, TTableSchema } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import ArrowResult from './ArrowResult'; +import IResultsProvider from './IResultsProvider'; +import ArrowResultHandler from './ArrowResultHandler'; import globalConfig from '../globalConfig'; -export default class CloudFetchResult extends ArrowResult { +export default class CloudFetchResultHandler extends ArrowResultHandler { private pendingLinks: Array = []; private downloadedBatches: Array = []; - constructor(context: IClientContext, schema?: TTableSchema) { + constructor(context: IClientContext, source: IResultsProvider, schema?: TTableSchema) { // Arrow schema returned in metadata is not needed for CloudFetch results: // each batch already contains schema and could be decoded as is - super(context, schema, Buffer.alloc(0)); + super(context, source, schema, Buffer.alloc(0)); } - async hasPendingData() { - return this.pendingLinks.length > 0 || this.downloadedBatches.length > 0; + public async hasMore() { + if (this.pendingLinks.length > 0 || this.downloadedBatches.length > 0) { + return true; + } + return super.hasMore(); } - protected async getBatches(data: Array): Promise> { - data.forEach((item) => { - item.resultLinks?.forEach((link) => { - this.pendingLinks.push(link); - }); + protected async getBatches(data?: TRowSet): Promise> { + data?.resultLinks?.forEach((link) => { + this.pendingLinks.push(link); }); if (this.downloadedBatches.length === 0) { diff --git a/lib/result/IOperationResult.ts b/lib/result/IOperationResult.ts deleted file mode 100644 index 7b42a196..00000000 --- a/lib/result/IOperationResult.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { TRowSet } from '../../thrift/TCLIService_types'; - -export default interface IOperationResult { - getValue(data?: Array): Promise; - - hasPendingData(): Promise; -} diff --git a/lib/result/IResultsProvider.ts b/lib/result/IResultsProvider.ts new file mode 100644 index 00000000..0e521f71 --- /dev/null +++ b/lib/result/IResultsProvider.ts @@ -0,0 +1,9 @@ +export interface ResultsProviderFetchNextOptions { + limit: number; +} + +export default interface IResultsProvider { + fetchNext(options: ResultsProviderFetchNextOptions): Promise; + + hasMore(): Promise; +} diff --git a/lib/result/JsonResult.ts b/lib/result/JsonResultHandler.ts similarity index 73% rename from lib/result/JsonResult.ts rename to lib/result/JsonResultHandler.ts index 0c7daefa..bcc07e77 100644 --- a/lib/result/JsonResult.ts +++ b/lib/result/JsonResultHandler.ts @@ -1,34 +1,38 @@ import { ColumnCode } from '../hive/Types'; import { TRowSet, TTableSchema, TColumn, TColumnDesc } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import IOperationResult from './IOperationResult'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; import { getSchemaColumns, convertThriftValue } from './utils'; -export default class JsonResult implements IOperationResult { +export default class JsonResultHandler implements IResultsProvider> { private readonly context: IClientContext; + private readonly source: IResultsProvider; + private readonly schema: Array; - constructor(context: IClientContext, schema?: TTableSchema) { + constructor(context: IClientContext, source: IResultsProvider, schema?: TTableSchema) { this.context = context; + this.source = source; this.schema = getSchemaColumns(schema); } - async hasPendingData() { - return false; + public async hasMore() { + return this.source.hasMore(); } - async getValue(data?: Array): Promise> { - if (this.schema.length === 0 || !data) { + public async fetchNext(options: ResultsProviderFetchNextOptions) { + if (this.schema.length === 0) { return []; } - return data.reduce((result: Array, rowSet: TRowSet) => { - const columns = rowSet.columns || []; - const rows = this.getRows(columns, this.schema); + const data = await this.source.fetchNext(options); + if (!data) { + return []; + } - return result.concat(rows); - }, []); + const columns = data.columns || []; + return this.getRows(columns, this.schema); } private getRows(columns: Array, descriptors: Array): Array { diff --git a/lib/DBSQLOperation/FetchResultsHelper.ts b/lib/result/RowSetProvider.ts similarity index 88% rename from lib/DBSQLOperation/FetchResultsHelper.ts rename to lib/result/RowSetProvider.ts index 79f82603..b131e905 100644 --- a/lib/DBSQLOperation/FetchResultsHelper.ts +++ b/lib/result/RowSetProvider.ts @@ -8,6 +8,7 @@ import { import { ColumnCode, FetchType, Int64 } from '../hive/Types'; import Status from '../dto/Status'; import IClientContext from '../contracts/IClientContext'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; function checkIfOperationHasMoreRows(response: TFetchResultsResp): boolean { if (response.hasMoreRows) { @@ -35,7 +36,7 @@ function checkIfOperationHasMoreRows(response: TFetchResultsResp): boolean { return (columnValue?.values?.length || 0) > 0; } -export default class FetchResultsHelper { +export default class RowSetProvider implements IResultsProvider { private readonly context: IClientContext; private readonly operationHandle: TOperationHandle; @@ -79,7 +80,7 @@ export default class FetchResultsHelper { return response.results; } - public async fetch(maxRows: number) { + public async fetchNext({ limit }: ResultsProviderFetchNextOptions) { const prefetchedResponse = this.prefetchedResults.shift(); if (prefetchedResponse) { return this.processFetchResponse(prefetchedResponse); @@ -89,10 +90,14 @@ export default class FetchResultsHelper { const response = await driver.fetchResults({ operationHandle: this.operationHandle, orientation: this.fetchOrientation, - maxRows: new Int64(maxRows), + maxRows: new Int64(limit), fetchType: FetchType.Data, }); return this.processFetchResponse(response); } + + public async hasMore() { + return this.hasMoreRows; + } } diff --git a/tests/e2e/arrow.test.js b/tests/e2e/arrow.test.js index a75c3059..4118a116 100644 --- a/tests/e2e/arrow.test.js +++ b/tests/e2e/arrow.test.js @@ -1,8 +1,9 @@ const { expect } = require('chai'); +const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const ArrowResult = require('../../dist/result/ArrowResult').default; +const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; const globalConfig = require('../../dist/globalConfig').default; const fixtures = require('../fixtures/compatibility'); @@ -76,7 +77,7 @@ describe('Arrow support', () => { expect(result).to.deep.equal(expectedColumn); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.not.instanceof(ArrowResult); + expect(resultHandler).to.be.not.instanceof(ArrowResultHandler); await operation.close(); }), @@ -93,7 +94,7 @@ describe('Arrow support', () => { expect(fixArrowResult(result)).to.deep.equal(expectedArrow); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); + expect(resultHandler).to.be.instanceof(ArrowResultHandler); await operation.close(); }), @@ -110,7 +111,7 @@ describe('Arrow support', () => { expect(fixArrowResult(result)).to.deep.equal(expectedArrowNativeTypes); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); + expect(resultHandler).to.be.instanceof(ArrowResultHandler); await operation.close(); }), @@ -130,14 +131,18 @@ describe('Arrow support', () => { // We use some internals here to check that server returned response with multiple batches const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); + expect(resultHandler).to.be.instanceof(ArrowResultHandler); - const rawData = await operation._data.fetch(rowsCount); + sinon.spy(operation._data, 'fetchNext'); + + const result = await resultHandler.fetchNext({ limit: rowsCount }); + + expect(operation._data.fetchNext.callCount).to.be.eq(1); + const rawData = await operation._data.fetchNext.firstCall.returnValue; // We don't know exact count of batches returned, it depends on server's configuration, // but with much enough rows there should be more than one result batch expect(rawData.arrowBatches?.length).to.be.gt(1); - const result = await resultHandler.getValue([rawData]); expect(result.length).to.be.eq(rowsCount); }); }); diff --git a/tests/e2e/cloudfetch.test.js b/tests/e2e/cloudfetch.test.js index 3997f6af..03b2cb60 100644 --- a/tests/e2e/cloudfetch.test.js +++ b/tests/e2e/cloudfetch.test.js @@ -3,7 +3,7 @@ const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const CloudFetchResult = require('../../dist/result/CloudFetchResult').default; +const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; const globalConfig = require('../../dist/globalConfig').default; const openSession = async () => { @@ -57,24 +57,24 @@ describe('CloudFetch', () => { // Check if we're actually getting data via CloudFetch const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceOf(CloudFetchResult); + expect(resultHandler).to.be.instanceOf(CloudFetchResultHandler); // Fetch first chunk and check if result handler behaves properly. // With the count of rows we queried, there should be at least one row set, // containing 8 result links. After fetching the first chunk, // result handler should download 5 of them and schedule the rest - expect(await resultHandler.hasPendingData()).to.be.false; + expect(await resultHandler.hasMore()).to.be.false; expect(resultHandler.pendingLinks.length).to.be.equal(0); expect(resultHandler.downloadedBatches.length).to.be.equal(0); - sinon.spy(operation._data, 'fetch'); + sinon.spy(operation._data, 'fetchNext'); const chunk = await operation.fetchChunk({ maxRows: 100000 }); // Count links returned from server - const resultSet = await operation._data.fetch.firstCall.returnValue; + const resultSet = await operation._data.fetchNext.firstCall.returnValue; const resultLinksCount = resultSet?.resultLinks?.length ?? 0; - expect(await resultHandler.hasPendingData()).to.be.true; + expect(await resultHandler.hasMore()).to.be.true; // expected batches minus first 5 already fetched expect(resultHandler.pendingLinks.length).to.be.equal( resultLinksCount - globalConfig.cloudFetchConcurrentDownloads, diff --git a/tests/unit/DBSQLOperation.test.js b/tests/unit/DBSQLOperation.test.js index 94834baf..ef9c89c9 100644 --- a/tests/unit/DBSQLOperation.test.js +++ b/tests/unit/DBSQLOperation.test.js @@ -6,9 +6,9 @@ const DBSQLOperation = require('../../dist/DBSQLOperation').default; const StatusError = require('../../dist/errors/StatusError').default; const OperationStateError = require('../../dist/errors/OperationStateError').default; const HiveDriverError = require('../../dist/errors/HiveDriverError').default; -const JsonResult = require('../../dist/result/JsonResult').default; -const ArrowResult = require('../../dist/result/ArrowResult').default; -const CloudFetchResult = require('../../dist/result/CloudFetchResult').default; +const JsonResultHandler = require('../../dist/result/JsonResultHandler').default; +const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; +const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; class OperationHandleMock { constructor(hasResultSet = true) { @@ -885,7 +885,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(JsonResult); + expect(resultHandler).to.be.instanceOf(JsonResultHandler); } arrowHandler: { @@ -895,7 +895,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(ArrowResult); + expect(resultHandler).to.be.instanceOf(ArrowResultHandler); } cloudFetchHandler: { @@ -905,7 +905,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(CloudFetchResult); + expect(resultHandler).to.be.instanceOf(CloudFetchResultHandler); } }); }); diff --git a/tests/unit/result/ArrowResult.test.js b/tests/unit/result/ArrowResultHandler.test.js similarity index 59% rename from tests/unit/result/ArrowResult.test.js rename to tests/unit/result/ArrowResultHandler.test.js index 27244190..03cdb5e1 100644 --- a/tests/unit/result/ArrowResult.test.js +++ b/tests/unit/result/ArrowResultHandler.test.js @@ -1,7 +1,8 @@ const { expect } = require('chai'); const fs = require('fs'); const path = require('path'); -const ArrowResult = require('../../../dist/result/ArrowResult').default; +const ArrowResultHandler = require('../../../dist/result/ArrowResultHandler').default; +const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); const sampleThriftSchema = { columns: [ @@ -84,40 +85,63 @@ const rowSetAllNulls = { ], }; -describe('ArrowResult', () => { +describe('ArrowResultHandler', () => { it('should not buffer any data', async () => { const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - await result.getValue([sampleRowSet1]); - expect(await result.hasPendingData()).to.be.false; + const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await rowSetProvider.hasMore()).to.be.true; + expect(await result.hasMore()).to.be.true; + + await result.fetchNext({ limit: 10000 }); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(await result.hasMore()).to.be.false; }); it('should convert data', async () => { const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - expect(await result.getValue([sampleRowSet1])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet2])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet3])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet4])).to.be.deep.eq([{ 1: 1 }]); + + case1: { + const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + } + case2: { + const rowSetProvider = new RowSetProviderMock([sampleRowSet2]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + } + case3: { + const rowSetProvider = new RowSetProviderMock([sampleRowSet3]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + } + case4: { + const rowSetProvider = new RowSetProviderMock([sampleRowSet4]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([{ 1: 1 }]); + } }); it('should return empty array if no data to process', async () => { const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - expect(await result.getValue()).to.be.deep.eq([]); - expect(await result.getValue([])).to.be.deep.eq([]); + const rowSetProvider = new RowSetProviderMock(); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should return empty array if no schema available', async () => { const context = {}; - const result = new ArrowResult(context); - expect(await result.getValue([sampleRowSet4])).to.be.deep.eq([]); + const rowSetProvider = new RowSetProviderMock([sampleRowSet4]); + const result = new ArrowResultHandler(context, rowSetProvider); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should detect nulls', async () => { const context = {}; - const result = new ArrowResult(context, thriftSchemaAllNulls, arrowSchemaAllNulls); - expect(await result.getValue([rowSetAllNulls])).to.be.deep.eq([ + const rowSetProvider = new RowSetProviderMock([rowSetAllNulls]); + const result = new ArrowResultHandler(context, rowSetProvider, thriftSchemaAllNulls, arrowSchemaAllNulls); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { boolean_field: null, diff --git a/tests/unit/result/CloudFetchResult.test.js b/tests/unit/result/CloudFetchResultHandler.test.js similarity index 74% rename from tests/unit/result/CloudFetchResult.test.js rename to tests/unit/result/CloudFetchResultHandler.test.js index 20451093..e0a48151 100644 --- a/tests/unit/result/CloudFetchResult.test.js +++ b/tests/unit/result/CloudFetchResultHandler.test.js @@ -1,8 +1,9 @@ const { expect, AssertionError } = require('chai'); const sinon = require('sinon'); const Int64 = require('node-int64'); -const CloudFetchResult = require('../../../dist/result/CloudFetchResult').default; +const CloudFetchResultHandler = require('../../../dist/result/CloudFetchResultHandler').default; const globalConfig = require('../../../dist/globalConfig').default; +const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); const sampleThriftSchema = { columns: [ @@ -94,7 +95,7 @@ const sampleExpiredRowSet = { ], }; -describe('CloudFetchResult', () => { +describe('CloudFetchResultHandler', () => { let savedConcurrentDownloads; beforeEach(() => { @@ -107,24 +108,25 @@ describe('CloudFetchResult', () => { it('should report pending data if there are any', async () => { const context = {}; - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const rowSetProvider = new RowSetProviderMock(); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); case1: { result.pendingLinks = []; result.downloadedBatches = []; - expect(await result.hasPendingData()).to.be.false; + expect(await result.hasMore()).to.be.false; } case2: { result.pendingLinks = [{}]; // just anything here result.downloadedBatches = []; - expect(await result.hasPendingData()).to.be.true; + expect(await result.hasMore()).to.be.true; } case3: { result.pendingLinks = []; result.downloadedBatches = [{}]; // just anything here - expect(await result.hasPendingData()).to.be.true; + expect(await result.hasMore()).to.be.true; } }); @@ -132,22 +134,28 @@ describe('CloudFetchResult', () => { globalConfig.cloudFetchConcurrentDownloads = 0; // this will prevent it from downloading batches const context = {}; + const rowSetProvider = new RowSetProviderMock(); - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); const rowSets = [sampleRowSet1, sampleEmptyRowSet, sampleRowSet2]; const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); - const batches = await result.getBatches(rowSets); + const batches = []; + for (const rowSet of rowSets) { + const items = await result.getBatches(rowSet); + batches.push(...items); + } + expect(batches.length).to.be.equal(0); expect(result.fetch.called).to.be.false; expect(result.pendingLinks.length).to.be.equal(expectedLinksCount); @@ -157,24 +165,31 @@ describe('CloudFetchResult', () => { globalConfig.cloudFetchConcurrentDownloads = 2; const context = {}; + const rowSet = { + startRowOffset: 0, + resultLinks: [...sampleRowSet1.resultLinks, ...sampleRowSet2.resultLinks], + }; + const expectedLinksCount = rowSet.resultLinks.length; + const rowSetProvider = new RowSetProviderMock([rowSet]); - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleRowSet1, sampleRowSet2]; - const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); + expect(await rowSetProvider.hasMore()).to.be.true; initialFetch: { - const batches = await result.getBatches(rowSets); - expect(batches.length).to.be.equal(1); + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads); expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads); expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 1); @@ -182,8 +197,10 @@ describe('CloudFetchResult', () => { secondFetch: { // It should return previously fetched batch, not performing additional network requests - const batches = await result.getBatches([]); - expect(batches.length).to.be.equal(1); + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads); // no new fetches expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads); expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 2); @@ -191,8 +208,10 @@ describe('CloudFetchResult', () => { thirdFetch: { // Now buffer should be empty, and it should fetch next batches - const batches = await result.getBatches([]); - expect(batches.length).to.be.equal(1); + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads * 2); expect(result.pendingLinks.length).to.be.equal( expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads * 2, @@ -205,22 +224,21 @@ describe('CloudFetchResult', () => { globalConfig.cloudFetchConcurrentDownloads = 1; const context = {}; + const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: false, status: 500, statusText: 'Internal Server Error', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleRowSet1]; - try { - await result.getBatches(rowSets); + await result.fetchNext({ limit: 10000 }); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { @@ -233,22 +251,21 @@ describe('CloudFetchResult', () => { it('should handle expired links', async () => { const context = {}; + const rowSetProvider = new RowSetProviderMock([sampleExpiredRowSet]); - const result = new CloudFetchResult(context, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleExpiredRowSet]; - try { - await result.getBatches(rowSets); + await result.fetchNext({ limit: 10000 }); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { diff --git a/tests/unit/result/JsonResult.test.js b/tests/unit/result/JsonResultHandler.test.js similarity index 86% rename from tests/unit/result/JsonResult.test.js rename to tests/unit/result/JsonResultHandler.test.js index f7e90259..db6ff01d 100644 --- a/tests/unit/result/JsonResult.test.js +++ b/tests/unit/result/JsonResultHandler.test.js @@ -1,7 +1,8 @@ const { expect } = require('chai'); -const JsonResult = require('../../../dist/result/JsonResult').default; +const JsonResultHandler = require('../../../dist/result/JsonResultHandler').default; const { TCLIService_types } = require('../../../').thrift; const Int64 = require('node-int64'); +const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); const getColumnSchema = (columnName, type, position) => { if (type === undefined) { @@ -27,7 +28,7 @@ const getColumnSchema = (columnName, type, position) => { }; }; -describe('JsonResult', () => { +describe('JsonResultHandler', () => { it('should not buffer any data', async () => { const schema = { columns: [getColumnSchema('table.id', TCLIService_types.TTypeId.STRING_TYPE, 1)], @@ -39,10 +40,15 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); - await result.getValue(data); - expect(await result.hasPendingData()).to.be.false; + const result = new JsonResultHandler(context, rowSetProvider, schema); + expect(await rowSetProvider.hasMore()).to.be.true; + expect(await result.hasMore()).to.be.true; + + await result.fetchNext({ limit: 10000 }); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(await result.hasMore()).to.be.false; }); it('should convert schema with primitive types to json', async () => { @@ -127,10 +133,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.str': 'a', 'table.int64': 282578800148737, @@ -199,10 +206,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.array': ['a', 'b'], 'table.map': { key: 12 }, @@ -218,44 +226,11 @@ describe('JsonResult', () => { ]); }); - it('should merge data items', async () => { - const schema = { - columns: [getColumnSchema('table.id', TCLIService_types.TTypeId.STRING_TYPE, 1)], - }; - const data = [ - { - columns: [ - { - stringVal: { values: ['0', '1'] }, - }, - ], - }, - {}, // it should also handle empty sets - { - columns: [ - { - stringVal: { values: ['2', '3'] }, - }, - ], - }, - ]; - - const context = {}; - - const result = new JsonResult(context, schema); - - expect(await result.getValue(data)).to.be.deep.eq([ - { 'table.id': '0' }, - { 'table.id': '1' }, - { 'table.id': '2' }, - { 'table.id': '3' }, - ]); - }); - it('should detect nulls', () => { const context = {}; + const rowSetProvider = new RowSetProviderMock(); - const result = new JsonResult(context, null); + const result = new JsonResultHandler(context, rowSetProvider, null); const buf = Buffer.from([0x55, 0xaa, 0xc3]); [ @@ -368,10 +343,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.str': null, 'table.int64': null, @@ -399,11 +375,10 @@ describe('JsonResult', () => { }; const context = {}; + const rowSetProvider = new RowSetProviderMock(); - const result = new JsonResult(context, schema); - - expect(await result.getValue()).to.be.deep.eq([]); - expect(await result.getValue([])).to.be.deep.eq([]); + const result = new JsonResultHandler(context, rowSetProvider, schema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should return empty array if no schema available', async () => { @@ -418,10 +393,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context); + const result = new JsonResultHandler(context, rowSetProvider); - expect(await result.getValue(data)).to.be.deep.eq([]); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should return raw data if types are not specified', async () => { @@ -453,10 +429,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.array': '["a", "b"]', 'table.map': '{ "key": 12 }', diff --git a/tests/unit/result/compatibility.test.js b/tests/unit/result/compatibility.test.js index 5b27d39e..6d047d57 100644 --- a/tests/unit/result/compatibility.test.js +++ b/tests/unit/result/compatibility.test.js @@ -1,31 +1,36 @@ const { expect } = require('chai'); -const ArrowResult = require('../../../dist/result/ArrowResult').default; -const JsonResult = require('../../../dist/result/JsonResult').default; +const ArrowResultHandler = require('../../../dist/result/ArrowResultHandler').default; +const JsonResultHandler = require('../../../dist/result/JsonResultHandler').default; const { fixArrowResult } = require('../../fixtures/compatibility'); const fixtureColumn = require('../../fixtures/compatibility/column'); const fixtureArrow = require('../../fixtures/compatibility/arrow'); const fixtureArrowNT = require('../../fixtures/compatibility/arrow_native_types'); +const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); + describe('Result handlers compatibility tests', () => { it('colum-based data', async () => { const context = {}; - const result = new JsonResult(context, fixtureColumn.schema); - const rows = await result.getValue(fixtureColumn.rowSets); + const rowSetProvider = new RowSetProviderMock(fixtureColumn.rowSets); + const result = new JsonResultHandler(context, rowSetProvider, fixtureColumn.schema); + const rows = await result.fetchNext({ limit: 10000 }); expect(rows).to.deep.equal(fixtureColumn.expected); }); it('arrow-based data without native types', async () => { const context = {}; - const result = new ArrowResult(context, fixtureArrow.schema, fixtureArrow.arrowSchema); - const rows = await result.getValue(fixtureArrow.rowSets); + const rowSetProvider = new RowSetProviderMock(fixtureArrow.rowSets); + const result = new ArrowResultHandler(context, rowSetProvider, fixtureArrow.schema, fixtureArrow.arrowSchema); + const rows = await result.fetchNext({ limit: 10000 }); expect(fixArrowResult(rows)).to.deep.equal(fixtureArrow.expected); }); it('arrow-based data with native types', async () => { const context = {}; - const result = new ArrowResult(context, fixtureArrowNT.schema, fixtureArrowNT.arrowSchema); - const rows = await result.getValue(fixtureArrowNT.rowSets); + const rowSetProvider = new RowSetProviderMock(fixtureArrowNT.rowSets); + const result = new ArrowResultHandler(context, rowSetProvider, fixtureArrowNT.schema, fixtureArrowNT.arrowSchema); + const rows = await result.fetchNext({ limit: 10000 }); expect(fixArrowResult(rows)).to.deep.equal(fixtureArrowNT.expected); }); }); diff --git a/tests/unit/result/fixtures/RowSetProviderMock.js b/tests/unit/result/fixtures/RowSetProviderMock.js new file mode 100644 index 00000000..1e878a01 --- /dev/null +++ b/tests/unit/result/fixtures/RowSetProviderMock.js @@ -0,0 +1,15 @@ +class RowSetProviderMock { + constructor(rowSets) { + this.rowSets = Array.isArray(rowSets) ? [...rowSets] : []; + } + + async hasMore() { + return this.rowSets.length > 0; + } + + async fetchNext() { + return this.rowSets.shift(); + } +} + +module.exports = RowSetProviderMock;