diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 39ba2faa..098f15cd 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -6,6 +6,7 @@ import { Pept2interpro } from './unipept/pept2interpro.js'; import { Pept2lca } from './unipept/pept2lca.js'; import { Pept2prot } from './unipept/pept2prot.js'; import { Pept2taxa } from './unipept/pept2taxa.js'; +import { Peptinfo } from './unipept/peptinfo.js'; export class Unipept extends BaseCommand { @@ -31,7 +32,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2interpro().command) .addCommand(new Pept2lca().command) .addCommand(new Pept2prot().command) - .addCommand(new Pept2taxa().command); + .addCommand(new Pept2taxa().command) + .addCommand(new Peptinfo().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/peptinfo.ts b/lib/commands/unipept/peptinfo.ts new file mode 100644 index 00000000..62cd9cd0 --- /dev/null +++ b/lib/commands/unipept/peptinfo.ts @@ -0,0 +1,38 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Peptinfo extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept peptinfo command retrieves from Unipept the functional information and the lowest common ancestor of the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("peptinfo"); + + this.command + .summary("Fetch functional information and the taxonomic lowest common ancestor of UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "report the names of the functional annotations and all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + if (this.options.all) { + return 100; + } else { + return 1000; + } + } +} diff --git a/tests/commands/unipept/peptinfo.test.ts b/tests/commands/unipept/peptinfo.test.ts new file mode 100644 index 00000000..1e4aa65d --- /dev/null +++ b/tests/commands/unipept/peptinfo.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Peptinfo } from "../../../lib/commands/unipept/peptinfo"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Peptinfo(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,total_protein_count,taxon_id,taxon_name,taxon_rank,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count")).toBeTruthy(); + expect(output[1].startsWith("AALTER,3310,1,root")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Peptinfo(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,total_protein_count,taxon_id,taxon_name,taxon_rank,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,3310,1,root")).toBeTruthy(); + expect(output.length).toBe(2); +});