From 362789f1ac23e9e22df8f834ce8c0f6820faeef0 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 14:28:44 +0200 Subject: [PATCH] add pept2taxa --- lib/commands/unipept.ts | 4 ++- lib/commands/unipept/pept2interpro.ts | 2 +- lib/commands/unipept/pept2prot.ts | 2 +- lib/commands/unipept/pept2taxa.ts | 34 ++++++++++++++++++++++ lib/commands/unipept/unipept_subcommand.ts | 6 ++++ tests/commands/unipept/pept2taxa.test.ts | 27 +++++++++++++++++ 6 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 lib/commands/unipept/pept2taxa.ts create mode 100644 tests/commands/unipept/pept2taxa.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index deaf4ef1..39ba2faa 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -5,6 +5,7 @@ import { Pept2go } from './unipept/pept2go.js'; import { Pept2interpro } from './unipept/pept2interpro.js'; import { Pept2lca } from './unipept/pept2lca.js'; import { Pept2prot } from './unipept/pept2prot.js'; +import { Pept2taxa } from './unipept/pept2taxa.js'; export class Unipept extends BaseCommand { @@ -29,7 +30,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2go().command) .addCommand(new Pept2interpro().command) .addCommand(new Pept2lca().command) - .addCommand(new Pept2prot().command); + .addCommand(new Pept2prot().command) + .addCommand(new Pept2taxa().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/pept2interpro.ts b/lib/commands/unipept/pept2interpro.ts index f13a03d0..55b9e766 100644 --- a/lib/commands/unipept/pept2interpro.ts +++ b/lib/commands/unipept/pept2interpro.ts @@ -15,7 +15,7 @@ The command will give priority to the first way tryptic peptides are passed, in super("pept2interpro"); this.command - .summary("Fetch GO terms of UniProt entries that match tryptic peptides.") + .summary("Fetch InterPro entries of UniProt entries that match tryptic peptides.") .description(this.description) .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") .option("-a, --all", "Also return the names of the InterPro entries. Note that this may have a performance penalty.") diff --git a/lib/commands/unipept/pept2prot.ts b/lib/commands/unipept/pept2prot.ts index 90f55e36..02f880d4 100644 --- a/lib/commands/unipept/pept2prot.ts +++ b/lib/commands/unipept/pept2prot.ts @@ -18,7 +18,7 @@ The command will give priority to the first way tryptic peptides are passed, in .summary("Fetch UniProt entries that match tryptic peptides.") .description(this.description) .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") - .option("-a, --all", "Also return the names of the EC numbers. Note that this may have a performance penalty.") + .option("-a, --all", "report all information fields of UniProt entries available in Unipept. Note that this may have a performance penalty.") .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) .argument("[peptides...]", "optionally, 1 or more peptides") .action((args, options) => this.run(args, options)); diff --git a/lib/commands/unipept/pept2taxa.ts b/lib/commands/unipept/pept2taxa.ts new file mode 100644 index 00000000..19e3adfd --- /dev/null +++ b/lib/commands/unipept/pept2taxa.ts @@ -0,0 +1,34 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Pept2taxa extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("pept2taxa"); + + this.command + .summary("Fetch taxa of UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + return 5; + } +} diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index c5dfc4b4..e1aba774 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -60,6 +60,12 @@ export abstract class UnipeptSubcommand { this.formatter = FormatterFactory.getFormatter(this.options.format); if (this.options.output) { this.outputStream = createWriteStream(this.options.output); + } else { + process.stdout.on("error", (err) => { + if (err.code === "EPIPE") { + process.exit(0); + } + }) } const iterator = this.getInputIterator(args, options.input as string); diff --git a/tests/commands/unipept/pept2taxa.test.ts b/tests/commands/unipept/pept2taxa.test.ts new file mode 100644 index 00000000..cbfb72c7 --- /dev/null +++ b/tests/commands/unipept/pept2taxa.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Pept2taxa } from "../../../lib/commands/unipept/pept2taxa"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Pept2taxa(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith("AALTER,41,Stigmatella aurantiaca,species")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Pept2taxa(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,41,Stigmatella aurantiaca,species")).toBeTruthy(); + expect(output.length).toBe(2); +});