From ce55c09eacada8b7f8e6a0ea3263b06c3db4bbfe Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 14:41:25 +0200 Subject: [PATCH] add protinfo --- lib/commands/unipept.ts | 4 +++- lib/commands/unipept/protinfo.ts | 32 +++++++++++++++++++++++++ tests/commands/unipept/protinfo.test.ts | 27 +++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 lib/commands/unipept/protinfo.ts create mode 100644 tests/commands/unipept/protinfo.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 098f15cd..3ba62182 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -7,6 +7,7 @@ import { Pept2lca } from './unipept/pept2lca.js'; import { Pept2prot } from './unipept/pept2prot.js'; import { Pept2taxa } from './unipept/pept2taxa.js'; import { Peptinfo } from './unipept/peptinfo.js'; +import { Protinfo } from './unipept/protinfo.js'; export class Unipept extends BaseCommand { @@ -33,7 +34,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2lca().command) .addCommand(new Pept2prot().command) .addCommand(new Pept2taxa().command) - .addCommand(new Peptinfo().command); + .addCommand(new Peptinfo().command) + .addCommand(new Protinfo().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/protinfo.ts b/lib/commands/unipept/protinfo.ts new file mode 100644 index 00000000..22b0ca62 --- /dev/null +++ b/lib/commands/unipept/protinfo.ts @@ -0,0 +1,32 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Protinfo extends UnipeptSubcommand { + + readonly description = `For each UniProt id the unipept protinfo command retrieves from Unipept the functional information and the NCBI id. The command expects a list of UniProt ids that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way protein id's are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("protinfo"); + + this.command + .summary("Fetch functional and taxonomic information of UniProt ids") + .description(this.description) + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[proteins...]", "optionally, 1 or more UniProt ids") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["protein"]; + } + + defaultBatchSize(): number { + return 1000; + } +} diff --git a/tests/commands/unipept/protinfo.test.ts b/tests/commands/unipept/protinfo.test.ts new file mode 100644 index 00000000..561804f8 --- /dev/null +++ b/tests/commands/unipept/protinfo.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Protinfo } from "../../../lib/commands/unipept/protinfo"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Protinfo(); + await command.run(["P78330"], { header: true, format: "csv" }); + expect(output[0].startsWith("protein,taxon_id,taxon_name,taxon_rank,ec_number,go_term,ipr_code")).toBeTruthy(); + expect(output[1].startsWith("P78330,9606,Homo sapiens")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Protinfo(); + await command.run([">test", "P78330"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,protein,taxon_id,taxon_name,taxon_rank,ec_number,go_term,ipr_code")).toBeTruthy(); + expect(output[1].startsWith(">test,P78330,9606,Homo sapiens")).toBeTruthy(); + expect(output.length).toBe(2); +});