From c3126fdbd4086a9d3c556b7f04799c001ebf766b Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 11:00:47 +0200 Subject: [PATCH 01/14] implement pept2ec --- lib/commands/unipept.ts | 2 ++ lib/commands/unipept/pept2ec.ts | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 lib/commands/unipept/pept2ec.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 24c1b29a..c25d2bdd 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -1,4 +1,5 @@ import { BaseCommand } from './base_command.js'; +import { Pept2ec } from './unipept/pept2ec.js'; import { Pept2lca } from './unipept/pept2lca.js'; export class Unipept extends BaseCommand { @@ -19,6 +20,7 @@ The command will give priority to the first way the input is passed, in the orde this.program .summary("Command line interface to Unipept web services.") .description(this.description) + .addCommand(new Pept2ec().command) .addCommand(new Pept2lca().command); } diff --git a/lib/commands/unipept/pept2ec.ts b/lib/commands/unipept/pept2ec.ts new file mode 100644 index 00000000..437a311b --- /dev/null +++ b/lib/commands/unipept/pept2ec.ts @@ -0,0 +1,38 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Pept2ec extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept pept2ec command retrieves from Unipept the set of EC numbers from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("pept2ec"); + + this.command + .summary("Fetch EC numbers of UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "Also return the names of the EC numbers. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + if (this.options.all) { + return 100; + } else { + return 1000; + } + } +} From 931f810863bbfc61b06b69355214bfc882660ce1 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 11:39:31 +0200 Subject: [PATCH 02/14] add pept2lca test --- lib/commands/unipept/unipept_subcommand.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index 56f9e1fc..c5dfc4b4 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -43,7 +43,7 @@ export abstract class UnipeptSubcommand { command.option("-q, --quiet", "disable service messages"); command.option("-i, --input ", "read input from file"); command.option("-o, --output ", "write output to file"); - command.addOption(new Option("-f, --format ", "define the output format").choices(UnipeptSubcommand.VALID_FORMATS).default("json")); + command.addOption(new Option("-f, --format ", "define the output format").choices(UnipeptSubcommand.VALID_FORMATS).default("csv")); command.option("--host ", "specify the server running the Unipept web service"); // internal options @@ -53,7 +53,7 @@ export abstract class UnipeptSubcommand { return command; } - async run(args: string[], options: { input?: string }): Promise { + async run(args: string[], options: { [key: string]: unknown }): Promise { this.options = options; this.host = this.getHost(); this.url = `${this.host}/api/v2/${this.name}.json`; @@ -62,7 +62,7 @@ export abstract class UnipeptSubcommand { this.outputStream = createWriteStream(this.options.output); } - const iterator = this.getInputIterator(args, options.input); + const iterator = this.getInputIterator(args, options.input as string); const firstLine = (await iterator.next()).value; if (firstLine.startsWith(">")) { this.fasta = true; From d5446279e231aa870c092fd0c71512fe87deb917 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 12:23:14 +0200 Subject: [PATCH 03/14] add pept2ec test --- eslint.config.js | 1 + lib/formatters/csv_formatter.ts | 23 ++++++++++++++++++--- tests/commands/unipept/pept2ec.test.ts | 27 +++++++++++++++++++++++++ tests/commands/unipept/pept2lca.test.ts | 27 +++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 tests/commands/unipept/pept2ec.test.ts create mode 100644 tests/commands/unipept/pept2lca.test.ts diff --git a/eslint.config.js b/eslint.config.js index 3f8a7e2e..195212d1 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -10,6 +10,7 @@ export default [ { rules: { "@typescript-eslint/no-unused-vars": ["error", { argsIgnorePattern: "^_" }], + "@typescript-eslint/ban-ts-comment": "off", }, ignores: ["dist/"] } diff --git a/lib/formatters/csv_formatter.ts b/lib/formatters/csv_formatter.ts index 39b8657f..7cd36212 100644 --- a/lib/formatters/csv_formatter.ts +++ b/lib/formatters/csv_formatter.ts @@ -1,10 +1,11 @@ +import { get } from "http"; import { Formatter } from "./formatter.js"; import { stringify } from "csv-stringify/sync"; export class CSVFormatter extends Formatter { header(sampleData: { [key: string]: string }[], fastaMapper?: boolean | undefined): string { - return stringify([this.getKeys(sampleData, fastaMapper)]); + return stringify([this.getKeys(this.flatten(sampleData), fastaMapper)]); } footer(): string { @@ -12,10 +13,26 @@ export class CSVFormatter extends Formatter { } convert(data: object[]): string { - return stringify(data); + return stringify(this.flatten(data as { [key: string]: unknown }[])); } - getKeys(data: { [key: string]: string }[], fastaMapper?: boolean | undefined): string[] { + getKeys(data: { [key: string]: unknown }[], fastaMapper?: boolean | undefined): string[] { return fastaMapper ? ["fasta_header", ...Object.keys(data[0])] : Object.keys(data[0]); } + + flatten(data: { [key: string]: unknown }[]): { [key: string]: unknown }[] { + if (this.getKeys(data).includes("ec")) { + // @ts-ignore + const keys = Object.keys(data[0].ec[0]); + data.forEach(row => { + keys.forEach(key => { + const newKey = key.startsWith("ec") ? key : `ec_${key}`; + // @ts-ignore + row[newKey] = row.ec.map(e => e[key]).join(" "); + }); + delete row.ec; + }); + } + return data; + } } diff --git a/tests/commands/unipept/pept2ec.test.ts b/tests/commands/unipept/pept2ec.test.ts new file mode 100644 index 00000000..ee789bed --- /dev/null +++ b/tests/commands/unipept/pept2ec.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Pept2ec } from "../../../lib/commands/unipept/pept2ec"; + +let output: string[]; +const writeSpy = jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Pept2ec(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,total_protein_count,ec_number,ec_protein_count")).toBeTruthy(); + expect(output[1].startsWith("AALTER,3310,2.3.2.27 3.1.3.3")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Pept2ec(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,total_protein_count,ec_number,ec_protein_count")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,3310,2.3.2.27 3.1.3.3")).toBeTruthy(); + expect(output.length).toBe(2); +}); diff --git a/tests/commands/unipept/pept2lca.test.ts b/tests/commands/unipept/pept2lca.test.ts new file mode 100644 index 00000000..0b18c6ac --- /dev/null +++ b/tests/commands/unipept/pept2lca.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Pept2lca } from "../../../lib/commands/unipept/pept2lca"; + +let output: string[]; +const writeSpy = jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Pept2lca(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,taxon_id")).toBeTruthy(); + expect(output[1].startsWith("AALTER,1,root,no rank")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Pept2lca(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,taxon_id")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,1,root,no rank")).toBeTruthy(); + expect(output.length).toBe(2); +}); From d7ed283d5071ea839e7d4a3902764d9e22bb7fa3 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 12:25:48 +0200 Subject: [PATCH 04/14] fix linter --- lib/formatters/csv_formatter.ts | 1 - lib/formatters/to_xml.ts | 1 - tests/commands/unipept/pept2ec.test.ts | 2 +- tests/commands/unipept/pept2lca.test.ts | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/formatters/csv_formatter.ts b/lib/formatters/csv_formatter.ts index 7cd36212..3b661207 100644 --- a/lib/formatters/csv_formatter.ts +++ b/lib/formatters/csv_formatter.ts @@ -1,4 +1,3 @@ -import { get } from "http"; import { Formatter } from "./formatter.js"; import { stringify } from "csv-stringify/sync"; diff --git a/lib/formatters/to_xml.ts b/lib/formatters/to_xml.ts index fb10ead4..c806e4f2 100644 --- a/lib/formatters/to_xml.ts +++ b/lib/formatters/to_xml.ts @@ -1,4 +1,3 @@ -// eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-nocheck // This file was taken from https://github.com/kawanet/to-xml and modified to have a specific output for arrays. diff --git a/tests/commands/unipept/pept2ec.test.ts b/tests/commands/unipept/pept2ec.test.ts index ee789bed..87283aa7 100644 --- a/tests/commands/unipept/pept2ec.test.ts +++ b/tests/commands/unipept/pept2ec.test.ts @@ -2,7 +2,7 @@ import { jest } from '@jest/globals'; import { Pept2ec } from "../../../lib/commands/unipept/pept2ec"; let output: string[]; -const writeSpy = jest +jest .spyOn(process.stdout, "write") .mockImplementation((data: unknown) => { output.push(data as string); return true; }); diff --git a/tests/commands/unipept/pept2lca.test.ts b/tests/commands/unipept/pept2lca.test.ts index 0b18c6ac..881d6fa5 100644 --- a/tests/commands/unipept/pept2lca.test.ts +++ b/tests/commands/unipept/pept2lca.test.ts @@ -2,7 +2,7 @@ import { jest } from '@jest/globals'; import { Pept2lca } from "../../../lib/commands/unipept/pept2lca"; let output: string[]; -const writeSpy = jest +jest .spyOn(process.stdout, "write") .mockImplementation((data: unknown) => { output.push(data as string); return true; }); From 42fe0bf8f90ea610234818b7b688085e5f098e06 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 13:53:32 +0200 Subject: [PATCH 05/14] add pept2funct --- lib/commands/unipept.ts | 2 ++ lib/commands/unipept/pept2funct.ts | 38 +++++++++++++++++++++++ lib/formatters/csv_formatter.ts | 24 +++++++------- tests/commands/unipept/pept2funct.test.ts | 27 ++++++++++++++++ 4 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 lib/commands/unipept/pept2funct.ts create mode 100644 tests/commands/unipept/pept2funct.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index c25d2bdd..525b6321 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -1,5 +1,6 @@ import { BaseCommand } from './base_command.js'; import { Pept2ec } from './unipept/pept2ec.js'; +import { Pept2funct } from './unipept/pept2funct.js'; import { Pept2lca } from './unipept/pept2lca.js'; export class Unipept extends BaseCommand { @@ -21,6 +22,7 @@ The command will give priority to the first way the input is passed, in the orde .summary("Command line interface to Unipept web services.") .description(this.description) .addCommand(new Pept2ec().command) + .addCommand(new Pept2funct().command) .addCommand(new Pept2lca().command); } diff --git a/lib/commands/unipept/pept2funct.ts b/lib/commands/unipept/pept2funct.ts new file mode 100644 index 00000000..ae11ff57 --- /dev/null +++ b/lib/commands/unipept/pept2funct.ts @@ -0,0 +1,38 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Pept2funct extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept pept2funct command retrieves from Unipept the set of EC numbers and GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("pept2funct"); + + this.command + .summary("Fetch EC numbers, GO terms and InterPro codes of UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "Also return the names of the EC numbers, GO terms and InterPro codes. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + if (this.options.all) { + return 100; + } else { + return 1000; + } + } +} diff --git a/lib/formatters/csv_formatter.ts b/lib/formatters/csv_formatter.ts index 3b661207..822c593e 100644 --- a/lib/formatters/csv_formatter.ts +++ b/lib/formatters/csv_formatter.ts @@ -20,18 +20,20 @@ export class CSVFormatter extends Formatter { } flatten(data: { [key: string]: unknown }[]): { [key: string]: unknown }[] { - if (this.getKeys(data).includes("ec")) { - // @ts-ignore - const keys = Object.keys(data[0].ec[0]); - data.forEach(row => { - keys.forEach(key => { - const newKey = key.startsWith("ec") ? key : `ec_${key}`; - // @ts-ignore - row[newKey] = row.ec.map(e => e[key]).join(" "); + const prefixes = ["ec", "go", "ipr"]; + prefixes.forEach(prefix => { + if (this.getKeys(data).includes(prefix)) {// @ts-ignore + const keys = Object.keys(data[0][prefix][0]); + data.forEach(row => { + keys.forEach(key => { + const newKey = key.startsWith(prefix) ? key : `${prefix}_${key}`; + // @ts-ignore + row[newKey] = row[prefix].map(e => e[key]).join(" "); + }); + delete row[prefix]; }); - delete row.ec; - }); - } + } + }); return data; } } diff --git a/tests/commands/unipept/pept2funct.test.ts b/tests/commands/unipept/pept2funct.test.ts new file mode 100644 index 00000000..97d21b7e --- /dev/null +++ b/tests/commands/unipept/pept2funct.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Pept2funct } from "../../../lib/commands/unipept/pept2funct"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Pept2funct(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,total_protein_count,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count")).toBeTruthy(); + expect(output[1].startsWith("AALTER,3310")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Pept2funct(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,total_protein_count,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,3310")).toBeTruthy(); + expect(output.length).toBe(2); +}); From d7fffcd300241d9d89c5b62af5f0f597ec142f78 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 13:57:16 +0200 Subject: [PATCH 06/14] add pept2go --- lib/commands/unipept.ts | 2 ++ lib/commands/unipept/pept2go.ts | 38 ++++++++++++++++++++++++++ tests/commands/unipept/pept2go.test.ts | 27 ++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 lib/commands/unipept/pept2go.ts create mode 100644 tests/commands/unipept/pept2go.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 525b6321..b70718f6 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -1,6 +1,7 @@ import { BaseCommand } from './base_command.js'; import { Pept2ec } from './unipept/pept2ec.js'; import { Pept2funct } from './unipept/pept2funct.js'; +import { Pept2go } from './unipept/pept2go.js'; import { Pept2lca } from './unipept/pept2lca.js'; export class Unipept extends BaseCommand { @@ -23,6 +24,7 @@ The command will give priority to the first way the input is passed, in the orde .description(this.description) .addCommand(new Pept2ec().command) .addCommand(new Pept2funct().command) + .addCommand(new Pept2go().command) .addCommand(new Pept2lca().command); } diff --git a/lib/commands/unipept/pept2go.ts b/lib/commands/unipept/pept2go.ts new file mode 100644 index 00000000..8f91128d --- /dev/null +++ b/lib/commands/unipept/pept2go.ts @@ -0,0 +1,38 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Pept2go extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept pept2ec command retrieves from Unipept the set of GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("pept2go"); + + this.command + .summary("Fetch GO terms of UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "Also return the names of the GO terms. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + if (this.options.all) { + return 100; + } else { + return 1000; + } + } +} diff --git a/tests/commands/unipept/pept2go.test.ts b/tests/commands/unipept/pept2go.test.ts new file mode 100644 index 00000000..08775db1 --- /dev/null +++ b/tests/commands/unipept/pept2go.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Pept2go } from "../../../lib/commands/unipept/pept2go"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Pept2go(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,total_protein_count,go_term,go_protein_count")).toBeTruthy(); + expect(output[1].startsWith("AALTER,3310,GO:0003677")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Pept2go(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,total_protein_count,go_term,go_protein_count")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,3310,GO:0003677")).toBeTruthy(); + expect(output.length).toBe(2); +}); From 07d13683e0dad3e76d6dfbb1b5e307e45fb3d499 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 14:01:15 +0200 Subject: [PATCH 07/14] add pept2interpro --- lib/commands/unipept.ts | 2 ++ lib/commands/unipept/pept2go.ts | 2 +- lib/commands/unipept/pept2interpro.ts | 38 ++++++++++++++++++++ tests/commands/unipept/pept2interpro.test.ts | 27 ++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 lib/commands/unipept/pept2interpro.ts create mode 100644 tests/commands/unipept/pept2interpro.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index b70718f6..3c299050 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -2,6 +2,7 @@ import { BaseCommand } from './base_command.js'; import { Pept2ec } from './unipept/pept2ec.js'; import { Pept2funct } from './unipept/pept2funct.js'; import { Pept2go } from './unipept/pept2go.js'; +import { Pept2interpro } from './unipept/pept2interpro.js'; import { Pept2lca } from './unipept/pept2lca.js'; export class Unipept extends BaseCommand { @@ -25,6 +26,7 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2ec().command) .addCommand(new Pept2funct().command) .addCommand(new Pept2go().command) + .addCommand(new Pept2interpro().command) .addCommand(new Pept2lca().command); } diff --git a/lib/commands/unipept/pept2go.ts b/lib/commands/unipept/pept2go.ts index 8f91128d..c14fcf15 100644 --- a/lib/commands/unipept/pept2go.ts +++ b/lib/commands/unipept/pept2go.ts @@ -3,7 +3,7 @@ import { UnipeptSubcommand } from "./unipept_subcommand.js"; export class Pept2go extends UnipeptSubcommand { - readonly description = `For each tryptic peptide the unipept pept2ec command retrieves from Unipept the set of GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + readonly description = `For each tryptic peptide the unipept pept2go command retrieves from Unipept the set of GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed - as separate command line arguments - in a text file that is passed as an argument to the -i option diff --git a/lib/commands/unipept/pept2interpro.ts b/lib/commands/unipept/pept2interpro.ts new file mode 100644 index 00000000..f13a03d0 --- /dev/null +++ b/lib/commands/unipept/pept2interpro.ts @@ -0,0 +1,38 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Pept2interpro extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept pept2interpro command retrieves from Unipept the set of InterPro entries from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("pept2interpro"); + + this.command + .summary("Fetch GO terms of UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "Also return the names of the InterPro entries. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + if (this.options.all) { + return 100; + } else { + return 1000; + } + } +} diff --git a/tests/commands/unipept/pept2interpro.test.ts b/tests/commands/unipept/pept2interpro.test.ts new file mode 100644 index 00000000..5523e7fb --- /dev/null +++ b/tests/commands/unipept/pept2interpro.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Pept2interpro } from "../../../lib/commands/unipept/pept2interpro"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Pept2interpro(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,total_protein_count,ipr_code,ipr_protein_count")).toBeTruthy(); + expect(output[1].startsWith("AALTER,3310,IPR003613")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Pept2interpro(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,total_protein_count,ipr_code,ipr_protein_count")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,3310,IPR003613")).toBeTruthy(); + expect(output.length).toBe(2); +}); From bb4ed7561b7a6c94ba6254319d836f574c04e2b9 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 14:13:48 +0200 Subject: [PATCH 08/14] add pept2prot --- lib/commands/unipept.ts | 4 ++- lib/commands/unipept/pept2prot.ts | 38 ++++++++++++++++++++++++ tests/commands/unipept/pept2prot.test.ts | 27 +++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 lib/commands/unipept/pept2prot.ts create mode 100644 tests/commands/unipept/pept2prot.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 3c299050..deaf4ef1 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -4,6 +4,7 @@ import { Pept2funct } from './unipept/pept2funct.js'; import { Pept2go } from './unipept/pept2go.js'; import { Pept2interpro } from './unipept/pept2interpro.js'; import { Pept2lca } from './unipept/pept2lca.js'; +import { Pept2prot } from './unipept/pept2prot.js'; export class Unipept extends BaseCommand { @@ -27,7 +28,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2funct().command) .addCommand(new Pept2go().command) .addCommand(new Pept2interpro().command) - .addCommand(new Pept2lca().command); + .addCommand(new Pept2lca().command) + .addCommand(new Pept2prot().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/pept2prot.ts b/lib/commands/unipept/pept2prot.ts new file mode 100644 index 00000000..90f55e36 --- /dev/null +++ b/lib/commands/unipept/pept2prot.ts @@ -0,0 +1,38 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Pept2prot extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept pept2prot command retrieves from Unipept all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("pept2prot"); + + this.command + .summary("Fetch UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "Also return the names of the EC numbers. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + if (this.options.all) { + return 5; + } else { + return 10; + } + } +} diff --git a/tests/commands/unipept/pept2prot.test.ts b/tests/commands/unipept/pept2prot.test.ts new file mode 100644 index 00000000..768ff50e --- /dev/null +++ b/tests/commands/unipept/pept2prot.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Pept2prot } from "../../../lib/commands/unipept/pept2prot"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Pept2prot(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,uniprot_id,protein_name,taxon_id,protein")).toBeTruthy(); + expect(output[1].startsWith("AALTER,P78330")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Pept2prot(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,uniprot_id,protein_name,taxon_id,protein")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,P78330")).toBeTruthy(); + expect(output.length).toBe(2); +}); From 362789f1ac23e9e22df8f834ce8c0f6820faeef0 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 14:28:44 +0200 Subject: [PATCH 09/14] add pept2taxa --- lib/commands/unipept.ts | 4 ++- lib/commands/unipept/pept2interpro.ts | 2 +- lib/commands/unipept/pept2prot.ts | 2 +- lib/commands/unipept/pept2taxa.ts | 34 ++++++++++++++++++++++ lib/commands/unipept/unipept_subcommand.ts | 6 ++++ tests/commands/unipept/pept2taxa.test.ts | 27 +++++++++++++++++ 6 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 lib/commands/unipept/pept2taxa.ts create mode 100644 tests/commands/unipept/pept2taxa.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index deaf4ef1..39ba2faa 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -5,6 +5,7 @@ import { Pept2go } from './unipept/pept2go.js'; import { Pept2interpro } from './unipept/pept2interpro.js'; import { Pept2lca } from './unipept/pept2lca.js'; import { Pept2prot } from './unipept/pept2prot.js'; +import { Pept2taxa } from './unipept/pept2taxa.js'; export class Unipept extends BaseCommand { @@ -29,7 +30,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2go().command) .addCommand(new Pept2interpro().command) .addCommand(new Pept2lca().command) - .addCommand(new Pept2prot().command); + .addCommand(new Pept2prot().command) + .addCommand(new Pept2taxa().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/pept2interpro.ts b/lib/commands/unipept/pept2interpro.ts index f13a03d0..55b9e766 100644 --- a/lib/commands/unipept/pept2interpro.ts +++ b/lib/commands/unipept/pept2interpro.ts @@ -15,7 +15,7 @@ The command will give priority to the first way tryptic peptides are passed, in super("pept2interpro"); this.command - .summary("Fetch GO terms of UniProt entries that match tryptic peptides.") + .summary("Fetch InterPro entries of UniProt entries that match tryptic peptides.") .description(this.description) .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") .option("-a, --all", "Also return the names of the InterPro entries. Note that this may have a performance penalty.") diff --git a/lib/commands/unipept/pept2prot.ts b/lib/commands/unipept/pept2prot.ts index 90f55e36..02f880d4 100644 --- a/lib/commands/unipept/pept2prot.ts +++ b/lib/commands/unipept/pept2prot.ts @@ -18,7 +18,7 @@ The command will give priority to the first way tryptic peptides are passed, in .summary("Fetch UniProt entries that match tryptic peptides.") .description(this.description) .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") - .option("-a, --all", "Also return the names of the EC numbers. Note that this may have a performance penalty.") + .option("-a, --all", "report all information fields of UniProt entries available in Unipept. Note that this may have a performance penalty.") .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) .argument("[peptides...]", "optionally, 1 or more peptides") .action((args, options) => this.run(args, options)); diff --git a/lib/commands/unipept/pept2taxa.ts b/lib/commands/unipept/pept2taxa.ts new file mode 100644 index 00000000..19e3adfd --- /dev/null +++ b/lib/commands/unipept/pept2taxa.ts @@ -0,0 +1,34 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Pept2taxa extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("pept2taxa"); + + this.command + .summary("Fetch taxa of UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + return 5; + } +} diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index c5dfc4b4..e1aba774 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -60,6 +60,12 @@ export abstract class UnipeptSubcommand { this.formatter = FormatterFactory.getFormatter(this.options.format); if (this.options.output) { this.outputStream = createWriteStream(this.options.output); + } else { + process.stdout.on("error", (err) => { + if (err.code === "EPIPE") { + process.exit(0); + } + }) } const iterator = this.getInputIterator(args, options.input as string); diff --git a/tests/commands/unipept/pept2taxa.test.ts b/tests/commands/unipept/pept2taxa.test.ts new file mode 100644 index 00000000..cbfb72c7 --- /dev/null +++ b/tests/commands/unipept/pept2taxa.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Pept2taxa } from "../../../lib/commands/unipept/pept2taxa"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Pept2taxa(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith("AALTER,41,Stigmatella aurantiaca,species")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Pept2taxa(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,41,Stigmatella aurantiaca,species")).toBeTruthy(); + expect(output.length).toBe(2); +}); From 49d282875e5d5cfc8a1544b2b25b0f24af4365d5 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 14:34:22 +0200 Subject: [PATCH 10/14] add peptinfo --- lib/commands/unipept.ts | 4 ++- lib/commands/unipept/peptinfo.ts | 38 +++++++++++++++++++++++++ tests/commands/unipept/peptinfo.test.ts | 27 ++++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 lib/commands/unipept/peptinfo.ts create mode 100644 tests/commands/unipept/peptinfo.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 39ba2faa..098f15cd 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -6,6 +6,7 @@ import { Pept2interpro } from './unipept/pept2interpro.js'; import { Pept2lca } from './unipept/pept2lca.js'; import { Pept2prot } from './unipept/pept2prot.js'; import { Pept2taxa } from './unipept/pept2taxa.js'; +import { Peptinfo } from './unipept/peptinfo.js'; export class Unipept extends BaseCommand { @@ -31,7 +32,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2interpro().command) .addCommand(new Pept2lca().command) .addCommand(new Pept2prot().command) - .addCommand(new Pept2taxa().command); + .addCommand(new Pept2taxa().command) + .addCommand(new Peptinfo().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/peptinfo.ts b/lib/commands/unipept/peptinfo.ts new file mode 100644 index 00000000..62cd9cd0 --- /dev/null +++ b/lib/commands/unipept/peptinfo.ts @@ -0,0 +1,38 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Peptinfo extends UnipeptSubcommand { + + readonly description = `For each tryptic peptide the unipept peptinfo command retrieves from Unipept the functional information and the lowest common ancestor of the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("peptinfo"); + + this.command + .summary("Fetch functional information and the taxonomic lowest common ancestor of UniProt entries that match tryptic peptides.") + .description(this.description) + .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides") + .option("-a, --all", "report the names of the functional annotations and all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["peptide"]; + } + + defaultBatchSize(): number { + if (this.options.all) { + return 100; + } else { + return 1000; + } + } +} diff --git a/tests/commands/unipept/peptinfo.test.ts b/tests/commands/unipept/peptinfo.test.ts new file mode 100644 index 00000000..1e4aa65d --- /dev/null +++ b/tests/commands/unipept/peptinfo.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Peptinfo } from "../../../lib/commands/unipept/peptinfo"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Peptinfo(); + await command.run(["AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("peptide,total_protein_count,taxon_id,taxon_name,taxon_rank,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count")).toBeTruthy(); + expect(output[1].startsWith("AALTER,3310,1,root")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Peptinfo(); + await command.run([">test", "AALTER"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,peptide,total_protein_count,taxon_id,taxon_name,taxon_rank,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count")).toBeTruthy(); + expect(output[1].startsWith(">test,AALTER,3310,1,root")).toBeTruthy(); + expect(output.length).toBe(2); +}); From ce55c09eacada8b7f8e6a0ea3263b06c3db4bbfe Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 14:41:25 +0200 Subject: [PATCH 11/14] add protinfo --- lib/commands/unipept.ts | 4 +++- lib/commands/unipept/protinfo.ts | 32 +++++++++++++++++++++++++ tests/commands/unipept/protinfo.test.ts | 27 +++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 lib/commands/unipept/protinfo.ts create mode 100644 tests/commands/unipept/protinfo.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 098f15cd..3ba62182 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -7,6 +7,7 @@ import { Pept2lca } from './unipept/pept2lca.js'; import { Pept2prot } from './unipept/pept2prot.js'; import { Pept2taxa } from './unipept/pept2taxa.js'; import { Peptinfo } from './unipept/peptinfo.js'; +import { Protinfo } from './unipept/protinfo.js'; export class Unipept extends BaseCommand { @@ -33,7 +34,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2lca().command) .addCommand(new Pept2prot().command) .addCommand(new Pept2taxa().command) - .addCommand(new Peptinfo().command); + .addCommand(new Peptinfo().command) + .addCommand(new Protinfo().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/protinfo.ts b/lib/commands/unipept/protinfo.ts new file mode 100644 index 00000000..22b0ca62 --- /dev/null +++ b/lib/commands/unipept/protinfo.ts @@ -0,0 +1,32 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Protinfo extends UnipeptSubcommand { + + readonly description = `For each UniProt id the unipept protinfo command retrieves from Unipept the functional information and the NCBI id. The command expects a list of UniProt ids that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way protein id's are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("protinfo"); + + this.command + .summary("Fetch functional and taxonomic information of UniProt ids") + .description(this.description) + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[proteins...]", "optionally, 1 or more UniProt ids") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["protein"]; + } + + defaultBatchSize(): number { + return 1000; + } +} diff --git a/tests/commands/unipept/protinfo.test.ts b/tests/commands/unipept/protinfo.test.ts new file mode 100644 index 00000000..561804f8 --- /dev/null +++ b/tests/commands/unipept/protinfo.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Protinfo } from "../../../lib/commands/unipept/protinfo"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Protinfo(); + await command.run(["P78330"], { header: true, format: "csv" }); + expect(output[0].startsWith("protein,taxon_id,taxon_name,taxon_rank,ec_number,go_term,ipr_code")).toBeTruthy(); + expect(output[1].startsWith("P78330,9606,Homo sapiens")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Protinfo(); + await command.run([">test", "P78330"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,protein,taxon_id,taxon_name,taxon_rank,ec_number,go_term,ipr_code")).toBeTruthy(); + expect(output[1].startsWith(">test,P78330,9606,Homo sapiens")).toBeTruthy(); + expect(output.length).toBe(2); +}); From a58a0d39a30c7678e3ffc283b8255677018b8d4f Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 15:04:53 +0200 Subject: [PATCH 12/14] add taxa2lca --- lib/commands/unipept.ts | 4 ++- lib/commands/unipept/taxa2lca.ts | 29 ++++++++++++++++++++++ lib/commands/unipept/unipept_subcommand.ts | 12 ++++++++- lib/formatters/csv_formatter.ts | 3 +++ lib/formatters/formatter.ts | 3 +++ tests/commands/unipept/taxa2lca.test.ts | 19 ++++++++++++++ 6 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 lib/commands/unipept/taxa2lca.ts create mode 100644 tests/commands/unipept/taxa2lca.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 3ba62182..7f4ac200 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -8,6 +8,7 @@ import { Pept2prot } from './unipept/pept2prot.js'; import { Pept2taxa } from './unipept/pept2taxa.js'; import { Peptinfo } from './unipept/peptinfo.js'; import { Protinfo } from './unipept/protinfo.js'; +import { Taxa2lca } from './unipept/taxa2lca.js'; export class Unipept extends BaseCommand { @@ -35,7 +36,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2prot().command) .addCommand(new Pept2taxa().command) .addCommand(new Peptinfo().command) - .addCommand(new Protinfo().command); + .addCommand(new Protinfo().command) + .addCommand(new Taxa2lca().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/taxa2lca.ts b/lib/commands/unipept/taxa2lca.ts new file mode 100644 index 00000000..573487d2 --- /dev/null +++ b/lib/commands/unipept/taxa2lca.ts @@ -0,0 +1,29 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Taxa2lca extends UnipeptSubcommand { + + readonly description = `The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; + + constructor() { + super("taxa2lca"); + + this.command + .summary("Compute taxonomic lowest common ancestor for given list of taxa.") + .description(this.description) + .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[proteins...]", "optionally, 1 or more UniProt ids") + .action((args, options) => this.run(args, options)); + } + + defaultBatchSize(): number { + throw new Error("Batch size not needed for this command."); + } +} diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts index e1aba774..dcd18fbe 100644 --- a/lib/commands/unipept/unipept_subcommand.ts +++ b/lib/commands/unipept/unipept_subcommand.ts @@ -70,7 +70,9 @@ export abstract class UnipeptSubcommand { const iterator = this.getInputIterator(args, options.input as string); const firstLine = (await iterator.next()).value; - if (firstLine.startsWith(">")) { + if (this.command.name() === "taxa2lca") { + await this.simpleInputProcessor(firstLine, iterator); + } else if (firstLine.startsWith(">")) { this.fasta = true; await this.fastaInputProcessor(firstLine, iterator); } else { @@ -133,6 +135,14 @@ export abstract class UnipeptSubcommand { await this.processBatch(slice, fastaMapper); } + async simpleInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) { + const slice = [firstLine]; + for await (const line of iterator) { + slice.push(line); + } + await this.processBatch(slice); + } + private constructRequestBody(slice: string[]): URLSearchParams { const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$")); return new URLSearchParams({ diff --git a/lib/formatters/csv_formatter.ts b/lib/formatters/csv_formatter.ts index 822c593e..e674d2ca 100644 --- a/lib/formatters/csv_formatter.ts +++ b/lib/formatters/csv_formatter.ts @@ -16,6 +16,9 @@ export class CSVFormatter extends Formatter { } getKeys(data: { [key: string]: unknown }[], fastaMapper?: boolean | undefined): string[] { + if (!Array.isArray(data)) { + data = [data]; + } return fastaMapper ? ["fasta_header", ...Object.keys(data[0])] : Object.keys(data[0]); } diff --git a/lib/formatters/formatter.ts b/lib/formatters/formatter.ts index d0ca6dca..10d3cefc 100644 --- a/lib/formatters/formatter.ts +++ b/lib/formatters/formatter.ts @@ -8,6 +8,9 @@ export abstract class Formatter { if (fastaMapper) { data = this.integrateFastaHeaders(data as { [key: string]: string }[], fastaMapper); } + if (!Array.isArray(data)) { + data = [data]; + } return this.convert(data, first); } diff --git a/tests/commands/unipept/taxa2lca.test.ts b/tests/commands/unipept/taxa2lca.test.ts new file mode 100644 index 00000000..1f5c6351 --- /dev/null +++ b/tests/commands/unipept/taxa2lca.test.ts @@ -0,0 +1,19 @@ +import { jest } from '@jest/globals'; +import { Taxa2lca } from "../../../lib/commands/unipept/taxa2lca"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Taxa2lca(); + await command.run(["216816", "1680"], { header: true, format: "csv" }); + expect(output[0].startsWith("taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith("1678,Bifidobacterium,genus")).toBeTruthy(); + expect(output.length).toBe(2); +}); From 2d98583dd46e1cca1dc81551088c991ccac29c1a Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 15:35:11 +0200 Subject: [PATCH 13/14] add taxonomy command --- lib/commands/unipept.ts | 4 ++- lib/commands/unipept/taxa2lca.ts | 2 +- lib/commands/unipept/taxonomy.ts | 33 +++++++++++++++++++++++++ tests/commands/unipept/taxonomy.test.ts | 27 ++++++++++++++++++++ 4 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 lib/commands/unipept/taxonomy.ts create mode 100644 tests/commands/unipept/taxonomy.test.ts diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts index 7f4ac200..0585138f 100644 --- a/lib/commands/unipept.ts +++ b/lib/commands/unipept.ts @@ -9,6 +9,7 @@ import { Pept2taxa } from './unipept/pept2taxa.js'; import { Peptinfo } from './unipept/peptinfo.js'; import { Protinfo } from './unipept/protinfo.js'; import { Taxa2lca } from './unipept/taxa2lca.js'; +import { Taxonomy } from './unipept/taxonomy.js'; export class Unipept extends BaseCommand { @@ -37,7 +38,8 @@ The command will give priority to the first way the input is passed, in the orde .addCommand(new Pept2taxa().command) .addCommand(new Peptinfo().command) .addCommand(new Protinfo().command) - .addCommand(new Taxa2lca().command); + .addCommand(new Taxa2lca().command) + .addCommand(new Taxonomy().command); } async run(args?: string[]) { diff --git a/lib/commands/unipept/taxa2lca.ts b/lib/commands/unipept/taxa2lca.ts index 573487d2..5b9a5e6c 100644 --- a/lib/commands/unipept/taxa2lca.ts +++ b/lib/commands/unipept/taxa2lca.ts @@ -19,7 +19,7 @@ The command will give priority to the first way NCBI Taxonomy Identifiers are pa .description(this.description) .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) - .argument("[proteins...]", "optionally, 1 or more UniProt ids") + .argument("[taxonids...]", "optionally, 1 or more taxon ids") .action((args, options) => this.run(args, options)); } diff --git a/lib/commands/unipept/taxonomy.ts b/lib/commands/unipept/taxonomy.ts new file mode 100644 index 00000000..ae8426a1 --- /dev/null +++ b/lib/commands/unipept/taxonomy.ts @@ -0,0 +1,33 @@ +import { Option } from "commander"; +import { UnipeptSubcommand } from "./unipept_subcommand.js"; + +export class Taxonomy extends UnipeptSubcommand { + + readonly description = `The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed + +- as separate command line arguments +- in a text file that is passed as an argument to the -i option +- to standard input + +The command will give priority to the first way taxon id's are passed, in the order as listed above. Text files and standard input should have one taxon id per line.`; + + constructor() { + super("taxonomy"); + + this.command + .summary("Fetch taxonomic information from Unipept Taxonomy.") + .description(this.description) + .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.") + .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.")) + .argument("[peptides...]", "optionally, 1 or more peptides") + .action((args, options) => this.run(args, options)); + } + + requiredFields(): string[] { + return ["taxon_id"]; + } + + defaultBatchSize(): number { + return 100; + } +} diff --git a/tests/commands/unipept/taxonomy.test.ts b/tests/commands/unipept/taxonomy.test.ts new file mode 100644 index 00000000..afaec21e --- /dev/null +++ b/tests/commands/unipept/taxonomy.test.ts @@ -0,0 +1,27 @@ +import { jest } from '@jest/globals'; +import { Taxonomy } from "../../../lib/commands/unipept/taxonomy"; + +let output: string[]; +jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); + +beforeEach(() => { + output = []; +}); + +test('test with default args', async () => { + const command = new Taxonomy(); + await command.run(["216816"], { header: true, format: "csv" }); + expect(output[0].startsWith("taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith("216816,Bifidobacterium longum,species")).toBeTruthy(); + expect(output.length).toBe(2); +}); + +test('test with fasta', async () => { + const command = new Taxonomy(); + await command.run([">test", "216816"], { header: true, format: "csv" }); + expect(output[0].startsWith("fasta_header,taxon_id,taxon_name,taxon_rank")).toBeTruthy(); + expect(output[1].startsWith(">test,216816,Bifidobacterium longum,species")).toBeTruthy(); + expect(output.length).toBe(2); +}); From e7c2365c560a6cf91d0a2f7cbfc0eacd32c81ab6 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 6 Aug 2024 15:37:40 +0200 Subject: [PATCH 14/14] fix strings --- lib/commands/unipept/protinfo.ts | 2 +- lib/commands/unipept/taxa2lca.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/commands/unipept/protinfo.ts b/lib/commands/unipept/protinfo.ts index 22b0ca62..df8f6f8c 100644 --- a/lib/commands/unipept/protinfo.ts +++ b/lib/commands/unipept/protinfo.ts @@ -9,7 +9,7 @@ export class Protinfo extends UnipeptSubcommand { - in a text file that is passed as an argument to the -i option - to standard input -The command will give priority to the first way protein id's are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; +The command will give priority to the first way protein id's are passed, in the order as listed above. Text files and standard input should have one protein id per line.`; constructor() { super("protinfo"); diff --git a/lib/commands/unipept/taxa2lca.ts b/lib/commands/unipept/taxa2lca.ts index 5b9a5e6c..0dccba06 100644 --- a/lib/commands/unipept/taxa2lca.ts +++ b/lib/commands/unipept/taxa2lca.ts @@ -9,7 +9,7 @@ export class Taxa2lca extends UnipeptSubcommand { - in a text file that is passed as an argument to the -i option - to standard input -The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`; +The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one taxon id per line.`; constructor() { super("taxa2lca");