diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index 09a62bde..56f9e1fc 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -20,6 +20,9 @@ export abstract class UnipeptSubcommand { selectedFields?: RegExp[]; fasta: boolean; + // we must save this to be able to close it properly in tests + private streamInterface?: Interface; + constructor(name: string) { this.name = name; const version = JSON.parse(readFileSync(new URL("../../../package.json", import.meta.url), "utf8")).version; @@ -27,6 +30,7 @@ export abstract class UnipeptSubcommand { this.command = this.create(name); this.fasta = false; } + abstract defaultBatchSize(): number; requiredFields(): string[] { @@ -58,19 +62,17 @@ export abstract class UnipeptSubcommand { this.outputStream = createWriteStream(this.options.output); } - let slice = []; - - for await (const input of this.getInputIterator(args, options.input)) { - slice.push(input); - if (slice.length >= this.batchSize) { - await this.processBatch(slice); - slice = []; - } + const iterator = this.getInputIterator(args, options.input); + const firstLine = (await iterator.next()).value; + if (firstLine.startsWith(">")) { + this.fasta = true; + await this.fastaInputProcessor(firstLine, iterator); + } else { + await this.normalInputProcessor(firstLine, iterator); } - await this.processBatch(slice); } - async processBatch(slice: string[]): Promise { + async processBatch(slice: string[], fastaMapper?: { [key: string]: string }): Promise { if (!this.formatter) throw new Error("Formatter not set"); const r = await fetch(this.url as string, { @@ -87,11 +89,44 @@ export abstract class UnipeptSubcommand { this.outputStream.write(this.formatter.header(result, this.fasta)); } - this.outputStream.write(this.formatter.format(result, this.fasta, this.firstBatch)); + this.outputStream.write(this.formatter.format(result, fastaMapper, this.firstBatch)); if (this.firstBatch) this.firstBatch = false; } + async normalInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { + let slice = [firstLine]; + + for await (const line of iterator) { + slice.push(line); + if (slice.length >= this.batchSize) { + await this.processBatch(slice); + slice = []; + } + } + await this.processBatch(slice); + } + + async fastaInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { + let currentFastaHeader = firstLine; + let slice = []; + let fastaMapper: { [key: string]: string } = {}; + for await (const line of iterator) { + if (line.startsWith(">")) { + currentFastaHeader = line; + } else { + fastaMapper[line] = currentFastaHeader; + slice.push(line); + if (slice.length >= this.batchSize) { + await this.processBatch(slice, fastaMapper); + slice = []; + fastaMapper = {}; + } + } + } + await this.processBatch(slice, fastaMapper); + } + private constructRequestBody(slice: string[]): URLSearchParams { const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$")); return new URLSearchParams({ @@ -128,13 +163,15 @@ export abstract class UnipeptSubcommand { * - if an input file is given, use the file * - otherwise, use standard input */ - private getInputIterator(args: string[], input?: string): string[] | Interface { + private getInputIterator(args: string[], input?: string): IterableIterator | AsyncIterableIterator { if (args.length > 0) { - return args; + return args.values(); } else if (input) { - return createInterface({ input: createReadStream(input) }); + this.streamInterface = createInterface({ input: createReadStream(input) }); + return this.streamInterface[Symbol.asyncIterator](); } else { - return createInterface({ input: process.stdin }) + this.streamInterface = createInterface({ input: process.stdin }); + return this.streamInterface[Symbol.asyncIterator](); } } diff --git a/lib/formatters/formatter.ts b/lib/formatters/formatter.ts index 1c8ecf02..d0ca6dca 100644 --- a/lib/formatters/formatter.ts +++ b/lib/formatters/formatter.ts @@ -4,15 +4,18 @@ export abstract class Formatter { abstract footer(): string; abstract convert(data: object[], first?: boolean): string; - format(data: object[], fastaMapper?: boolean, first?: boolean): string { + format(data: object[], fastaMapper?: { [key: string]: string }, first?: boolean): string { if (fastaMapper) { - data = this.integrateFastaHeaders(data, fastaMapper); + data = this.integrateFastaHeaders(data as { [key: string]: string }[], fastaMapper); } return this.convert(data, first); } - // eslint-disable-next-line @typescript-eslint/no-unused-vars - integrateFastaHeaders(data: object[], fastaMapper: boolean): object[] { + integrateFastaHeaders(data: { [key: string]: string }[], fastaMapper: { [key: string]: string }): object[] { + const key = Object.keys(data[0])[0]; + data.forEach((entry, i) => { + data[i] = Object.assign({ fasta_header: fastaMapper[entry[key]] }, entry); + }); return data; } } diff --git a/lib/formatters/to_xml.ts b/lib/formatters/to_xml.ts index c40593b6..fb10ead4 100644 --- a/lib/formatters/to_xml.ts +++ b/lib/formatters/to_xml.ts @@ -250,6 +250,6 @@ function _isArray(array) { return array instanceof Array; } -export function toXML(value: object, replacer?: function, space?: number | string): string { - return _toXML(value, replacer, space); +export function toXML(value: object): string { + return _toXML(value); } diff --git a/tests/commands/unipept/unipept_subcommand.test.ts b/tests/commands/unipept/unipept_subcommand.test.ts index de816a36..1fde3605 100644 --- a/tests/commands/unipept/unipept_subcommand.test.ts +++ b/tests/commands/unipept/unipept_subcommand.test.ts @@ -1,4 +1,3 @@ -import { Interface } from 'readline'; import { Pept2lca } from '../../../lib/commands/unipept/pept2lca'; test('test command setup', () => { @@ -28,19 +27,18 @@ test('test correct inputIterator', async () => { const command = new Pept2lca(); // should be stdin - let input = command["getInputIterator"]([]) as Interface; - expect(input).toBeInstanceOf(Interface); - input.close(); + let input = command["getInputIterator"]([]) as AsyncIterableIterator; + expect(typeof input[Symbol.asyncIterator]).toBe("function"); + command['streamInterface']?.close(); // should be a (non-existant) file and error - input = command["getInputIterator"]([], "filename") as Interface; - input.on("error", (e) => { - expect(e.toString()).toMatch(/no such file/); - }); + input = command["getInputIterator"]([], "filename") as AsyncIterableIterator; + expect(typeof input[Symbol.asyncIterator]).toBe("function"); + await expect(async () => { await input.next() }).rejects.toThrow(/no such file/); // should be array - const inputArray = command["getInputIterator"](["A", "B"]); - expect(inputArray).toBeInstanceOf(Array); + const inputArray = command["getInputIterator"](["A", "B"]) as IterableIterator; + expect(typeof inputArray[Symbol.iterator]).toBe("function"); }); test('test selected fields parsing', () => { diff --git a/tests/formatters/csv_formatter.test.ts b/tests/formatters/csv_formatter.test.ts index 2b9a3b85..2ea80c37 100644 --- a/tests/formatters/csv_formatter.test.ts +++ b/tests/formatters/csv_formatter.test.ts @@ -23,8 +23,8 @@ test('test convert', () => { }); test('test format with fasta', () => { - //const fasta = [['>test', '5']]; - //const object = [TestObject.testObject(), TestObject.testObject()]; - //const csv = [`>test,${TestObject.asCsv()}`, TestObject.asCsv(), ""].join("\n"); - //expect(formatter.format(object, fasta, false)).toBe(csv); + const fasta = { 5: ">test" }; + const object = [TestObject.testObject(), TestObject.testObject()]; + const csv = [`>test,${TestObject.asCsv()}`, `>test,${TestObject.asCsv()}`, ""].join("\n"); + expect(formatter.format(object, fasta, false)).toBe(csv); }); diff --git a/tests/formatters/formatter.test.ts b/tests/formatters/formatter.test.ts new file mode 100644 index 00000000..c041f31b --- /dev/null +++ b/tests/formatters/formatter.test.ts @@ -0,0 +1,12 @@ +import { FormatterFactory } from "../../lib/formatters/formatter_factory"; +import { TestObject } from "./test_object"; + +test('test integrate fasta headers', async () => { + const formatter = FormatterFactory.getFormatter("csv"); + const fasta = { 5: ">test" }; + const object = [TestObject.testObject(), TestObject.testObject()]; + const integrated = [Object.assign({ fasta_header: ">test" }, TestObject.testObject()), Object.assign({ fasta_header: ">test" }, TestObject.testObject())]; + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + expect(formatter.integrateFastaHeaders(object, fasta)).toEqual(integrated); +}); diff --git a/tests/formatters/json_formatter.test.ts b/tests/formatters/json_formatter.test.ts index c4e80648..42e14293 100644 --- a/tests/formatters/json_formatter.test.ts +++ b/tests/formatters/json_formatter.test.ts @@ -21,8 +21,8 @@ test('test convert', () => { }); test('test format with fasta', () => { - //const fasta = [['>test', '5']]; - //const object = [TestObject.testObject()]; - //const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}'; - //expect(formatter.format(object, fasta, true)).toBe(json); + const fasta = { 5: ">test" }; + const object = [TestObject.testObject()]; + const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}'; + expect(formatter.format(object, fasta, true)).toBe(json); }); diff --git a/tests/formatters/xml_formatter.test.ts b/tests/formatters/xml_formatter.test.ts index c327d034..6994861b 100644 --- a/tests/formatters/xml_formatter.test.ts +++ b/tests/formatters/xml_formatter.test.ts @@ -21,8 +21,8 @@ test('test convert', () => { }); test('test format with fasta', () => { - //const fasta = [['>test', '5']]; - //const object = [TestObject.testObject()]; - //const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}'; - //expect(formatter.format(object, fasta, true)).toBe(json); + const fasta = { 5: ">test" }; + const object = [TestObject.testObject()]; + const xml = `>test${TestObject.asXml()}`; + expect(formatter.format(object, fasta, true)).toBe(xml); });