From 3b575a98ab2b114969736ee72ae5f35af9e6401f Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 5 Aug 2024 14:27:40 +0200 Subject: [PATCH 1/3] refactor existing code --- lib/commands/unipept/unipept_subcommand.ts | 39 ++++++++++++------- .../unipept/unipept_subcommand.test.ts | 17 ++++---- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index 09a62bde..e7758f5b 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -20,6 +20,9 @@ export abstract class UnipeptSubcommand { selectedFields?: RegExp[]; fasta: boolean; + // we must save this to be able to close it properly in tests + private streamInterface?: Interface; + constructor(name: string) { this.name = name; const version = JSON.parse(readFileSync(new URL("../../../package.json", import.meta.url), "utf8")).version; @@ -27,6 +30,7 @@ export abstract class UnipeptSubcommand { this.command = this.create(name); this.fasta = false; } + abstract defaultBatchSize(): number; requiredFields(): string[] { @@ -58,16 +62,10 @@ export abstract class UnipeptSubcommand { this.outputStream = createWriteStream(this.options.output); } - let slice = []; + const iterator = this.getInputIterator(args, options.input); + const firstLine = (await iterator.next()).value; - for await (const input of this.getInputIterator(args, options.input)) { - slice.push(input); - if (slice.length >= this.batchSize) { - await this.processBatch(slice); - slice = []; - } - } - await this.processBatch(slice); + await this.normalInputProcessor(firstLine, iterator); } async processBatch(slice: string[]): Promise { @@ -92,6 +90,19 @@ export abstract class UnipeptSubcommand { if (this.firstBatch) this.firstBatch = false; } + async normalInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { + let slice = [firstLine]; + + for await (const line of iterator) { + slice.push(line); + if (slice.length >= this.batchSize) { + await this.processBatch(slice); + slice = []; + } + } + await this.processBatch(slice); + } + private constructRequestBody(slice: string[]): URLSearchParams { const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$")); return new URLSearchParams({ @@ -128,13 +139,15 @@ export abstract class UnipeptSubcommand { * - if an input file is given, use the file * - otherwise, use standard input */ - private getInputIterator(args: string[], input?: string): string[] | Interface { + private getInputIterator(args: string[], input?: string): IterableIterator | AsyncIterableIterator { if (args.length > 0) { - return args; + return args.values(); } else if (input) { - return createInterface({ input: createReadStream(input) }); + this.streamInterface = createInterface({ input: createReadStream(input) }); + return this.streamInterface[Symbol.asyncIterator](); } else { - return createInterface({ input: process.stdin }) + this.streamInterface = createInterface({ input: process.stdin }); + return this.streamInterface[Symbol.asyncIterator](); } } diff --git a/tests/commands/unipept/unipept_subcommand.test.ts b/tests/commands/unipept/unipept_subcommand.test.ts index de816a36..4dab997d 100644 --- a/tests/commands/unipept/unipept_subcommand.test.ts +++ b/tests/commands/unipept/unipept_subcommand.test.ts @@ -28,19 +28,18 @@ test('test correct inputIterator', async () => { const command = new Pept2lca(); // should be stdin - let input = command["getInputIterator"]([]) as Interface; - expect(input).toBeInstanceOf(Interface); - input.close(); + let input = command["getInputIterator"]([]) as AsyncIterableIterator; + expect(typeof input[Symbol.asyncIterator]).toBe("function"); + command['streamInterface']?.close(); // should be a (non-existant) file and error - input = command["getInputIterator"]([], "filename") as Interface; - input.on("error", (e) => { - expect(e.toString()).toMatch(/no such file/); - }); + input = command["getInputIterator"]([], "filename") as AsyncIterableIterator; + expect(typeof input[Symbol.asyncIterator]).toBe("function"); + await expect(async () => { await input.next() }).rejects.toThrow(/no such file/); // should be array - const inputArray = command["getInputIterator"](["A", "B"]); - expect(inputArray).toBeInstanceOf(Array); + const inputArray = command["getInputIterator"](["A", "B"]) as IterableIterator; + expect(typeof inputArray[Symbol.iterator]).toBe("function"); }); test('test selected fields parsing', () => { From 2435f1fc2df0a2245952753790fd9f8558805da8 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 5 Aug 2024 15:22:57 +0200 Subject: [PATCH 2/3] add fasta support --- lib/commands/unipept/unipept_subcommand.ts | 32 ++++++++++++++++--- lib/formatters/formatter.ts | 11 ++++--- lib/formatters/to_xml.ts | 4 +-- .../unipept/unipept_subcommand.test.ts | 1 - 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index e7758f5b..56f9e1fc 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -64,11 +64,15 @@ export abstract class UnipeptSubcommand { const iterator = this.getInputIterator(args, options.input); const firstLine = (await iterator.next()).value; - - await this.normalInputProcessor(firstLine, iterator); + if (firstLine.startsWith(">")) { + this.fasta = true; + await this.fastaInputProcessor(firstLine, iterator); + } else { + await this.normalInputProcessor(firstLine, iterator); + } } - async processBatch(slice: string[]): Promise { + async processBatch(slice: string[], fastaMapper?: { [key: string]: string }): Promise { if (!this.formatter) throw new Error("Formatter not set"); const r = await fetch(this.url as string, { @@ -85,7 +89,7 @@ export abstract class UnipeptSubcommand { this.outputStream.write(this.formatter.header(result, this.fasta)); } - this.outputStream.write(this.formatter.format(result, this.fasta, this.firstBatch)); + this.outputStream.write(this.formatter.format(result, fastaMapper, this.firstBatch)); if (this.firstBatch) this.firstBatch = false; } @@ -103,6 +107,26 @@ export abstract class UnipeptSubcommand { await this.processBatch(slice); } + async fastaInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { + let currentFastaHeader = firstLine; + let slice = []; + let fastaMapper: { [key: string]: string } = {}; + for await (const line of iterator) { + if (line.startsWith(">")) { + currentFastaHeader = line; + } else { + fastaMapper[line] = currentFastaHeader; + slice.push(line); + if (slice.length >= this.batchSize) { + await this.processBatch(slice, fastaMapper); + slice = []; + fastaMapper = {}; + } + } + } + await this.processBatch(slice, fastaMapper); + } + private constructRequestBody(slice: string[]): URLSearchParams { const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$")); return new URLSearchParams({ diff --git a/lib/formatters/formatter.ts b/lib/formatters/formatter.ts index 1c8ecf02..5310ec19 100644 --- a/lib/formatters/formatter.ts +++ b/lib/formatters/formatter.ts @@ -4,15 +4,18 @@ export abstract class Formatter { abstract footer(): string; abstract convert(data: object[], first?: boolean): string; - format(data: object[], fastaMapper?: boolean, first?: boolean): string { + format(data: object[], fastaMapper?: { [key: string]: string }, first?: boolean): string { if (fastaMapper) { - data = this.integrateFastaHeaders(data, fastaMapper); + data = this.integrateFastaHeaders(data as { [key: string]: string }[], fastaMapper); } return this.convert(data, first); } - // eslint-disable-next-line @typescript-eslint/no-unused-vars - integrateFastaHeaders(data: object[], fastaMapper: boolean): object[] { + integrateFastaHeaders(data: { [key: string]: string }[], fastaMapper: { [key: string]: string }): object[] { + const key = Object.keys(data[0])[0]; + data.forEach((entry, i) => { + data[i] = Object.assign({ fastaHeader: fastaMapper[entry[key]] }, entry); + }); return data; } } diff --git a/lib/formatters/to_xml.ts b/lib/formatters/to_xml.ts index c40593b6..fb10ead4 100644 --- a/lib/formatters/to_xml.ts +++ b/lib/formatters/to_xml.ts @@ -250,6 +250,6 @@ function _isArray(array) { return array instanceof Array; } -export function toXML(value: object, replacer?: function, space?: number | string): string { - return _toXML(value, replacer, space); +export function toXML(value: object): string { + return _toXML(value); } diff --git a/tests/commands/unipept/unipept_subcommand.test.ts b/tests/commands/unipept/unipept_subcommand.test.ts index 4dab997d..1fde3605 100644 --- a/tests/commands/unipept/unipept_subcommand.test.ts +++ b/tests/commands/unipept/unipept_subcommand.test.ts @@ -1,4 +1,3 @@ -import { Interface } from 'readline'; import { Pept2lca } from '../../../lib/commands/unipept/pept2lca'; test('test command setup', () => { From b21872dbfe2062749c104f100647244501a42af8 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 5 Aug 2024 16:02:57 +0200 Subject: [PATCH 3/3] add tests --- lib/formatters/formatter.ts | 2 +- tests/formatters/csv_formatter.test.ts | 8 ++++---- tests/formatters/formatter.test.ts | 12 ++++++++++++ tests/formatters/json_formatter.test.ts | 8 ++++---- tests/formatters/xml_formatter.test.ts | 8 ++++---- 5 files changed, 25 insertions(+), 13 deletions(-) create mode 100644 tests/formatters/formatter.test.ts diff --git a/lib/formatters/formatter.ts b/lib/formatters/formatter.ts index 5310ec19..d0ca6dca 100644 --- a/lib/formatters/formatter.ts +++ b/lib/formatters/formatter.ts @@ -14,7 +14,7 @@ export abstract class Formatter { integrateFastaHeaders(data: { [key: string]: string }[], fastaMapper: { [key: string]: string }): object[] { const key = Object.keys(data[0])[0]; data.forEach((entry, i) => { - data[i] = Object.assign({ fastaHeader: fastaMapper[entry[key]] }, entry); + data[i] = Object.assign({ fasta_header: fastaMapper[entry[key]] }, entry); }); return data; } diff --git a/tests/formatters/csv_formatter.test.ts b/tests/formatters/csv_formatter.test.ts index 2b9a3b85..2ea80c37 100644 --- a/tests/formatters/csv_formatter.test.ts +++ b/tests/formatters/csv_formatter.test.ts @@ -23,8 +23,8 @@ test('test convert', () => { }); test('test format with fasta', () => { - //const fasta = [['>test', '5']]; - //const object = [TestObject.testObject(), TestObject.testObject()]; - //const csv = [`>test,${TestObject.asCsv()}`, TestObject.asCsv(), ""].join("\n"); - //expect(formatter.format(object, fasta, false)).toBe(csv); + const fasta = { 5: ">test" }; + const object = [TestObject.testObject(), TestObject.testObject()]; + const csv = [`>test,${TestObject.asCsv()}`, `>test,${TestObject.asCsv()}`, ""].join("\n"); + expect(formatter.format(object, fasta, false)).toBe(csv); }); diff --git a/tests/formatters/formatter.test.ts b/tests/formatters/formatter.test.ts new file mode 100644 index 00000000..c041f31b --- /dev/null +++ b/tests/formatters/formatter.test.ts @@ -0,0 +1,12 @@ +import { FormatterFactory } from "../../lib/formatters/formatter_factory"; +import { TestObject } from "./test_object"; + +test('test integrate fasta headers', async () => { + const formatter = FormatterFactory.getFormatter("csv"); + const fasta = { 5: ">test" }; + const object = [TestObject.testObject(), TestObject.testObject()]; + const integrated = [Object.assign({ fasta_header: ">test" }, TestObject.testObject()), Object.assign({ fasta_header: ">test" }, TestObject.testObject())]; + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + expect(formatter.integrateFastaHeaders(object, fasta)).toEqual(integrated); +}); diff --git a/tests/formatters/json_formatter.test.ts b/tests/formatters/json_formatter.test.ts index c4e80648..42e14293 100644 --- a/tests/formatters/json_formatter.test.ts +++ b/tests/formatters/json_formatter.test.ts @@ -21,8 +21,8 @@ test('test convert', () => { }); test('test format with fasta', () => { - //const fasta = [['>test', '5']]; - //const object = [TestObject.testObject()]; - //const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}'; - //expect(formatter.format(object, fasta, true)).toBe(json); + const fasta = { 5: ">test" }; + const object = [TestObject.testObject()]; + const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}'; + expect(formatter.format(object, fasta, true)).toBe(json); }); diff --git a/tests/formatters/xml_formatter.test.ts b/tests/formatters/xml_formatter.test.ts index c327d034..6994861b 100644 --- a/tests/formatters/xml_formatter.test.ts +++ b/tests/formatters/xml_formatter.test.ts @@ -21,8 +21,8 @@ test('test convert', () => { }); test('test format with fasta', () => { - //const fasta = [['>test', '5']]; - //const object = [TestObject.testObject()]; - //const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}'; - //expect(formatter.format(object, fasta, true)).toBe(json); + const fasta = { 5: ">test" }; + const object = [TestObject.testObject()]; + const xml = `>test${TestObject.asXml()}`; + expect(formatter.format(object, fasta, true)).toBe(xml); });