diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 3ba62182..7f4ac200 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -8,6 +8,7 @@ import { Pept2prot } from './unipept/pept2prot.js'; import { Pept2taxa } from './unipept/pept2taxa.js'; import { Peptinfo } from './unipept/peptinfo.js'; import { Protinfo } from './unipept/protinfo.js'; +import { Taxa2lca } from './unipept/taxa2lca.js'; export class Unipept extends BaseCommand { @@ -35,7 +36,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2prot().command) .addCommand(new Pept2taxa().command) .addCommand(new Peptinfo().command) - .addCommand(new Protinfo().command); + .addCommand(new Protinfo().command) + .addCommand(new Taxa2lca().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/taxa2lca.ts b/lib/commands/unipept/taxa2lca.ts new file mode 100644 index 00000000..573487d2 --- /dev/null +++ b/lib/commands/unipept/taxa2lca.ts @@ -0,0 +1,29 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Taxa2lca extends UnipeptSubcommand { + + readonly description = `The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("taxa2lca"); + + this.command + .summary("Compute taxonomic lowest common ancestor for given list of taxa.") + .description(this.description) + .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[proteins...]", "optionally, 1 or more UniProt ids") + .action((args, options) => this.run(args, options)); + } + + defaultBatchSize(): number { + throw new Error("Batch size not needed for this command."); + } +} diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index e1aba774..dcd18fbe 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -70,7 +70,9 @@ export abstract class UnipeptSubcommand { const iterator = this.getInputIterator(args, options.input as string); const firstLine = (await iterator.next()).value; - if (firstLine.startsWith(">")) { + if (this.command.name() === "taxa2lca") { + await this.simpleInputProcessor(firstLine, iterator); + } else if (firstLine.startsWith(">")) { this.fasta = true; await this.fastaInputProcessor(firstLine, iterator); } else { @@ -133,6 +135,14 @@ export abstract class UnipeptSubcommand { await this.processBatch(slice, fastaMapper); } + async simpleInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { + const slice = [firstLine]; + for await (const line of iterator) { + slice.push(line); + } + await this.processBatch(slice); + } + private constructRequestBody(slice: string[]): URLSearchParams { const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$")); return new URLSearchParams({ diff --git a/lib/formatters/csv_formatter.ts b/lib/formatters/csv_formatter.ts index 822c593e..e674d2ca 100644 --- a/lib/formatters/csv_formatter.ts +++ b/lib/formatters/csv_formatter.ts @@ -16,6 +16,9 @@ export class CSVFormatter extends Formatter { } getKeys(data: { [key: string]: unknown }[], fastaMapper?: boolean | undefined): string[] { + if (!Array.isArray(data)) { + data = [data]; + } return fastaMapper ? ["fasta_header", ...Object.keys(data[0])] : Object.keys(data[0]); } diff --git a/lib/formatters/formatter.ts b/lib/formatters/formatter.ts index d0ca6dca..10d3cefc 100644 --- a/lib/formatters/formatter.ts +++ b/lib/formatters/formatter.ts @@ -8,6 +8,9 @@ export abstract class Formatter { if (fastaMapper) { data = this.integrateFastaHeaders(data as { [key: string]: string }[], fastaMapper); } + if (!Array.isArray(data)) { + data = [data]; + } return this.convert(data, first); } diff --git a/tests/commands/unipept/taxa2lca.test.ts b/tests/commands/unipept/taxa2lca.test.ts new file mode 100644 index 00000000..1f5c6351 --- /dev/null +++ b/tests/commands/unipept/taxa2lca.test.ts @@ -0,0 +1,19 @@ +import { jest } from '@jest/globals'; +import { Taxa2lca } from "../../../lib/commands/unipept/taxa2lca"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Taxa2lca(); + await command.run(["216816", "1680"], { header: true, format: "csv" }); + expect(output[0].startsWith("taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith("1678,Bifidobacterium,genus")).toBeTruthy(); + expect(output.length).toBe(2); +});