From 2d98583dd46e1cca1dc81551088c991ccac29c1a Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 15:35:11 +0200 Subject: [PATCH] add taxonomy command --- lib/commands/unipept.ts | 4 ++- lib/commands/unipept/taxa2lca.ts | 2 +- lib/commands/unipept/taxonomy.ts | 33 +++++++++++++++++++++++++ tests/commands/unipept/taxonomy.test.ts | 27 ++++++++++++++++++++ 4 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 lib/commands/unipept/taxonomy.ts create mode 100644 tests/commands/unipept/taxonomy.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 7f4ac200..0585138f 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -9,6 +9,7 @@ import { Pept2taxa } from './unipept/pept2taxa.js'; import { Peptinfo } from './unipept/peptinfo.js'; import { Protinfo } from './unipept/protinfo.js'; import { Taxa2lca } from './unipept/taxa2lca.js'; +import { Taxonomy } from './unipept/taxonomy.js'; export class Unipept extends BaseCommand { @@ -37,7 +38,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2taxa().command) .addCommand(new Peptinfo().command) .addCommand(new Protinfo().command) - .addCommand(new Taxa2lca().command); + .addCommand(new Taxa2lca().command) + .addCommand(new Taxonomy().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/taxa2lca.ts b/lib/commands/unipept/taxa2lca.ts index 573487d2..5b9a5e6c 100644 --- a/lib/commands/unipept/taxa2lca.ts +++ b/lib/commands/unipept/taxa2lca.ts @@ -19,7 +19,7 @@ The command will give priority to the first way NCBI Taxonomy Identifiers are pa .description(this.description) .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) - .argument("[proteins...]", "optionally, 1 or more UniProt ids") + .argument("[taxonids...]", "optionally, 1 or more taxon ids") .action((args, options) => this.run(args, options)); } diff --git a/lib/commands/unipept/taxonomy.ts b/lib/commands/unipept/taxonomy.ts new file mode 100644 index 00000000..ae8426a1 --- /dev/null +++ b/lib/commands/unipept/taxonomy.ts @@ -0,0 +1,33 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Taxonomy extends UnipeptSubcommand { + + readonly description = `The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way taxon id's are passed, in the order as listed above. Text files and standard input should have one taxon id per line.`; + + constructor() { + super("taxonomy"); + + this.command + .summary("Fetch taxonomic information from Unipept Taxonomy.") + .description(this.description) + .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["taxon_id"]; + } + + defaultBatchSize(): number { + return 100; + } +} diff --git a/tests/commands/unipept/taxonomy.test.ts b/tests/commands/unipept/taxonomy.test.ts new file mode 100644 index 00000000..afaec21e --- /dev/null +++ b/tests/commands/unipept/taxonomy.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Taxonomy } from "../../../lib/commands/unipept/taxonomy"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Taxonomy(); + await command.run(["216816"], { header: true, format: "csv" }); + expect(output[0].startsWith("taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith("216816,Bifidobacterium longum,species")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Taxonomy(); + await command.run([">test", "216816"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith(">test,216816,Bifidobacterium longum,species")).toBeTruthy(); + expect(output.length).toBe(2); +});