From 83a281bc6281a4c748b8185e478b0aeac074e989 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 7 Aug 2024 13:28:30 +0200 Subject: [PATCH 1/7] filter results --- lib/commands/unipept/unipept_subcommand.ts | 18 +++++++++++++++++- lib/formatters/csv_formatter.ts | 3 --- lib/formatters/formatter.ts | 3 --- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index dcd18fbe..dbb40b5e 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -91,7 +91,7 @@ export abstract class UnipeptSubcommand { "User-Agent": this.user_agent, } }); - const result = await r.json(); + const result = this.filterResult(await r.json()); if (this.firstBatch && this.options.header) { this.outputStream.write(this.formatter.header(result, this.fasta)); @@ -102,6 +102,22 @@ export abstract class UnipeptSubcommand { if (this.firstBatch) this.firstBatch = false; } + filterResult(result: unknown): object[] { + if (!Array.isArray(result)) { + result = [result]; + } + if (this.getSelectedFields().length > 0) { + (result as { [key: string]: string }[]).forEach(entry => { + for (const key of Object.keys(entry)) { + if (!this.getSelectedFields().some(regex => regex.test(key))) { + delete entry[key]; + } + } + }); + } + return result as object[]; + } + async normalInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { let slice = [firstLine]; diff --git a/lib/formatters/csv_formatter.ts b/lib/formatters/csv_formatter.ts index e674d2ca..822c593e 100644 --- a/lib/formatters/csv_formatter.ts +++ b/lib/formatters/csv_formatter.ts @@ -16,9 +16,6 @@ export class CSVFormatter extends Formatter { } getKeys(data: { [key: string]: unknown }[], fastaMapper?: boolean | undefined): string[] { - if (!Array.isArray(data)) { - data = [data]; - } return fastaMapper ? ["fasta_header", ...Object.keys(data[0])] : Object.keys(data[0]); } diff --git a/lib/formatters/formatter.ts b/lib/formatters/formatter.ts index 10d3cefc..d0ca6dca 100644 --- a/lib/formatters/formatter.ts +++ b/lib/formatters/formatter.ts @@ -8,9 +8,6 @@ export abstract class Formatter { if (fastaMapper) { data = this.integrateFastaHeaders(data as { [key: string]: string }[], fastaMapper); } - if (!Array.isArray(data)) { - data = [data]; - } return this.convert(data, first); } From 5906ef096027a60edb81058e8ae49c279bd1c200 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 7 Aug 2024 13:34:35 +0200 Subject: [PATCH 2/7] always flatten the results for csv so select fields works --- lib/commands/unipept/unipept_subcommand.ts | 4 ++++ lib/formatters/csv_formatter.ts | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index dbb40b5e..40565283 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -4,6 +4,7 @@ import { createInterface } from "node:readline"; import { Interface } from "readline"; import { Formatter } from "../../formatters/formatter.js"; import { FormatterFactory } from "../../formatters/formatter_factory.js"; +import { CSVFormatter } from "../../formatters/csv_formatter.js"; export abstract class UnipeptSubcommand { public command: Command; @@ -106,6 +107,9 @@ export abstract class UnipeptSubcommand { if (!Array.isArray(result)) { result = [result]; } + if (this.formatter && this.formatter instanceof CSVFormatter) { + result = this.formatter.flatten(result as { [key: string]: unknown }[]); + } if (this.getSelectedFields().length > 0) { (result as { [key: string]: string }[]).forEach(entry => { for (const key of Object.keys(entry)) { diff --git a/lib/formatters/csv_formatter.ts b/lib/formatters/csv_formatter.ts index 822c593e..c2713353 100644 --- a/lib/formatters/csv_formatter.ts +++ b/lib/formatters/csv_formatter.ts @@ -4,7 +4,7 @@ import { stringify } from "csv-stringify/sync"; export class CSVFormatter extends Formatter { header(sampleData: { [key: string]: string }[], fastaMapper?: boolean | undefined): string { - return stringify([this.getKeys(this.flatten(sampleData), fastaMapper)]); + return stringify([this.getKeys(sampleData, fastaMapper)]); } footer(): string { @@ -12,7 +12,7 @@ export class CSVFormatter extends Formatter { } convert(data: object[]): string { - return stringify(this.flatten(data as { [key: string]: unknown }[])); + return stringify(data); } getKeys(data: { [key: string]: unknown }[], fastaMapper?: boolean | undefined): string[] { From 3c54ee19c81973b0d8bb848e26aa82e2a8084937 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 7 Aug 2024 13:42:13 +0200 Subject: [PATCH 3/7] don't crash on empty response --- lib/commands/unipept/unipept_subcommand.ts | 5 ++++- tests/formatters/csv_formatter.test.ts | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index 40565283..bd62346d 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -92,7 +92,10 @@ export abstract class UnipeptSubcommand { "User-Agent": this.user_agent, } }); - const result = this.filterResult(await r.json()); + + let result = await r.json(); + if (Array.isArray(result) && result.length === 0) return; + result = this.filterResult(result); if (this.firstBatch && this.options.header) { this.outputStream.write(this.formatter.header(result, this.fasta)); diff --git a/tests/formatters/csv_formatter.test.ts b/tests/formatters/csv_formatter.test.ts index 2ea80c37..cc8d010d 100644 --- a/tests/formatters/csv_formatter.test.ts +++ b/tests/formatters/csv_formatter.test.ts @@ -4,10 +4,8 @@ import { TestObject } from "./test_object"; const formatter = FormatterFactory.getFormatter("csv"); test('test header', () => { - //const fasta = [["peptide", ">test"]]; const object = [TestObject.testObject(), TestObject.testObject()]; expect(formatter.header(object)).toBe(TestObject.asCsvHeader()); - //expect(formatter.header(object, fasta)).toBe(`fasta_header,${TestObject.asCsvHeader()}`); }); test('test footer', () => { From 193284cc5aab4d7e2160919d5486b3df67b26060 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 7 Aug 2024 13:43:14 +0200 Subject: [PATCH 4/7] don't crash if the server doesn't return valid json --- lib/commands/unipept/unipept_subcommand.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index bd62346d..46be7ffc 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -93,7 +93,12 @@ export abstract class UnipeptSubcommand { } }); - let result = await r.json(); + let result; + try { + result = await r.json(); + } catch (e) { + result = []; + } if (Array.isArray(result) && result.length === 0) return; result = this.filterResult(result); From 495d461f8a7c238ab2afe5dffd44af915701cf29 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 7 Aug 2024 14:24:22 +0200 Subject: [PATCH 5/7] retry requests and log errors --- lib/commands/unipept/unipept_subcommand.ts | 59 +++++++++++++++++++--- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index 46be7ffc..9ef198a5 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -5,6 +5,9 @@ import { Interface } from "readline"; import { Formatter } from "../../formatters/formatter.js"; import { FormatterFactory } from "../../formatters/formatter_factory.js"; import { CSVFormatter } from "../../formatters/csv_formatter.js"; +import path from "path"; +import os from "os"; +import { appendFile, mkdir, writeFile } from "fs/promises"; export abstract class UnipeptSubcommand { public command: Command; @@ -84,14 +87,20 @@ export abstract class UnipeptSubcommand { async processBatch(slice: string[], fastaMapper?: { [key: string]: string }): Promise { if (!this.formatter) throw new Error("Formatter not set"); - const r = await fetch(this.url as string, { - method: "POST", - body: this.constructRequestBody(slice), - headers: { - "Accept-Encoding": "gzip", - "User-Agent": this.user_agent, - } - }); + let r; + try { + r = await this.fetchWithRetry(this.url as string, { + method: "POST", + body: this.constructRequestBody(slice), + headers: { + "Accept-Encoding": "gzip", + "User-Agent": this.user_agent, + } + }); + } catch (e) { + await this.saveError(e as string); + return; + } let result; try { @@ -171,6 +180,35 @@ export abstract class UnipeptSubcommand { await this.processBatch(slice); } + async saveError(message: string) { + const errorPath = this.errorFilePath(); + mkdir(path.dirname(errorPath), { recursive: true }); + await appendFile(errorPath, `${message}\n`); + console.error(`API request failed! log can be found in ${errorPath}`); + } + + fetchWithRetry(url: string, options: RequestInit, retries = 5): Promise { + return fetch(url, options) + .then(response => { + if (response.ok) { + return response; + } else { + return Promise.reject(`${response.status} ${response.statusText}`); + } + }) + .catch(async error => { + if (retries > 0) { + // retry with delay + // console.error("retrying"); + const delay = 5000 * Math.random(); + await new Promise(resolve => setTimeout(resolve, delay)); + return this.fetchWithRetry(url, options, retries - 1); + } else { + return Promise.reject(`Failed to fetch data from the Unipept API: ${error}`); + } + }); + } + private constructRequestBody(slice: string[]): URLSearchParams { const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$")); return new URLSearchParams({ @@ -201,6 +239,11 @@ export abstract class UnipeptSubcommand { } } + private errorFilePath(): string { + const timestamp = new Date().toISOString().split('T')[0]; + return path.join(os.homedir(), '.unipept', `unipept-${timestamp}.log`); + } + /** * Returns an input iterator to use for the request. * - if arguments are given, use arguments From 37a23a2735a81bc02747c5f9cc7d12191e461334 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 7 Aug 2024 14:26:05 +0200 Subject: [PATCH 6/7] linter --- lib/commands/unipept/unipept_subcommand.ts | 2 +- tests/formatters/formatter.test.ts | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index 9ef198a5..517ef65a 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -7,7 +7,7 @@ import { FormatterFactory } from "../../formatters/formatter_factory.js"; import { CSVFormatter } from "../../formatters/csv_formatter.js"; import path from "path"; import os from "os"; -import { appendFile, mkdir, writeFile } from "fs/promises"; +import { appendFile, mkdir } from "fs/promises"; export abstract class UnipeptSubcommand { public command: Command; diff --git a/tests/formatters/formatter.test.ts b/tests/formatters/formatter.test.ts index c041f31b..9db35348 100644 --- a/tests/formatters/formatter.test.ts +++ b/tests/formatters/formatter.test.ts @@ -6,7 +6,6 @@ test('test integrate fasta headers', async () => { const fasta = { 5: ">test" }; const object = [TestObject.testObject(), TestObject.testObject()]; const integrated = [Object.assign({ fasta_header: ">test" }, TestObject.testObject()), Object.assign({ fasta_header: ">test" }, TestObject.testObject())]; - // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore expect(formatter.integrateFastaHeaders(object, fasta)).toEqual(integrated); }); From 0579bec14dd7cc9b8a34a3059ff68e485bf5d305 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 7 Aug 2024 14:41:46 +0200 Subject: [PATCH 7/7] add comments --- lib/commands/unipept/unipept_subcommand.ts | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index 517ef65a..47f6d210 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -62,9 +62,12 @@ export abstract class UnipeptSubcommand { this.host = this.getHost(); this.url = `${this.host}/api/v2/${this.name}.json`; this.formatter = FormatterFactory.getFormatter(this.options.format); + if (this.options.output) { this.outputStream = createWriteStream(this.options.output); } else { + // if we write to stdout, we need to handle the EPIPE error + // this happens when the output is piped to another command that stops reading process.stdout.on("error", (err) => { if (err.code === "EPIPE") { process.exit(0); @@ -75,6 +78,7 @@ export abstract class UnipeptSubcommand { const iterator = this.getInputIterator(args, options.input as string); const firstLine = (await iterator.next()).value; if (this.command.name() === "taxa2lca") { + // this subcommand is an exception where the entire input is read before processing await this.simpleInputProcessor(firstLine, iterator); } else if (firstLine.startsWith(">")) { this.fasta = true; @@ -120,6 +124,9 @@ export abstract class UnipeptSubcommand { if (this.firstBatch) this.firstBatch = false; } + /** + * Filter the result based on the selected fields + */ filterResult(result: unknown): object[] { if (!Array.isArray(result)) { result = [result]; @@ -139,6 +146,9 @@ export abstract class UnipeptSubcommand { return result as object[]; } + /** + * Reads batchSize lines from the input and processes them + */ async normalInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { let slice = [firstLine]; @@ -152,6 +162,10 @@ export abstract class UnipeptSubcommand { await this.processBatch(slice); } + /** + * Reads batchSize lines from the input and processes them, + * but takes into account the fasta headers. + */ async fastaInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { let currentFastaHeader = firstLine; let slice = []; @@ -172,6 +186,9 @@ export abstract class UnipeptSubcommand { await this.processBatch(slice, fastaMapper); } + /** + * Reads the entire input and processes it in one go + */ async simpleInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { const slice = [firstLine]; for await (const line of iterator) { @@ -180,6 +197,9 @@ export abstract class UnipeptSubcommand { await this.processBatch(slice); } + /** + * Appends the error message to the log file of today and prints it to the console + */ async saveError(message: string) { const errorPath = this.errorFilePath(); mkdir(path.dirname(errorPath), { recursive: true }); @@ -187,6 +207,11 @@ export abstract class UnipeptSubcommand { console.error(`API request failed! log can be found in ${errorPath}`); } + /** + * Uses fetch to get data from the Unipept API. + * Has a retry mechanism that retries the request up to 5 times with a delay of 0-5 seconds. + * In addition, handles failed requests by returning a rejected promise. + */ fetchWithRetry(url: string, options: RequestInit, retries = 5): Promise { return fetch(url, options) .then(response => {