Skip to content

Commit

Permalink
Merge pull request #176 from unipept/next-prot2pept
Browse files Browse the repository at this point in the history
Port prot2pept to typescript
  • Loading branch information
bmesuere authored Jun 21, 2024
2 parents b071301 + f0346d3 commit ad82132
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 0 deletions.
6 changes: 6 additions & 0 deletions bin/prot2pept.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env node

import { Prot2pept } from '../lib/commands/prot2pept.js';

const command = new Prot2pept();
command.run();
79 changes: 79 additions & 0 deletions lib/commands/prot2pept.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { createInterface } from 'node:readline';
import { BaseCommand } from './base_command.js';

export class Prot2pept extends BaseCommand {

readonly description = `The prot2pept command splits each protein sequence into a list of peptides according to a given cleavage-pattern. The command expects a list of protein sequences that are passed to standard input.
The input should have either one protein sequence per line or contain a FASTA formatted list of protein sequences. FASTA headers are preserved in the output, so that peptides can be bundled per protein sequence.
`;

constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean, args?: string[] }) {
super(options);

this.program
.summary("Splits each protein sequence into a list of peptides.")
.description(this.description)
.option("-p, --pattern <regex>", "specify cleavage-pattern (regex) as the pattern after which the next peptide will be cleaved. By default, it will create tryptic peptides.", "([KR])([^P])")
}

/**
* Performance note: Just as with peptfilter, this implementation can be made faster by using line events instead of
* async iterators.
*/
async run() {
this.parseArguments();

let pattern;
try {
pattern = new RegExp(this.program.opts().pattern, "g");
} catch (e) {
this.program.error(`Your pattern was invalid: ${(e as Error).message}`);
}

let fasta = false;
let protein = [];

// buffering output makes a big difference in performance
let output = [];
let i = 0;

for await (const line of createInterface({ input: process.stdin })) {
if (i === 0 && line.startsWith(">")) {
fasta = true;
}

i++;

if (fasta) { // if we're in fasta mode, a protein could be split over multiple lines
if (line.startsWith(">")) { // if we encounter a new header, process the previous protein and output the current header
if (protein.length > 0) {
output.push(Prot2pept.splitProtein(protein.join(""), pattern));
}
output.push(line.trimEnd());
protein = [];
} else {
protein.push(line.trimEnd());
}
} else { // if we're not in fasta mode, each line is a protein sequence
output.push(Prot2pept.splitProtein(line.trimEnd(), pattern));
}

if (i % 1000 === 0) {
output.push(""); //add a newline at the end of the buffer without additional string copy
process.stdout.write(output.join("\n"));
output = [];
}
}

if (fasta) { // if in fasta mode, process the last protein
output.push(Prot2pept.splitProtein(protein.join(""), pattern));
}
output.push("");
process.stdout.write(output.join("\n"));
}

static splitProtein(line: string, pattern: RegExp): string {
return line.replaceAll(pattern, "$1\n$2").replaceAll(pattern, "$1\n$2").replaceAll("\n\n", "\n");
}
}
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"type": "module",
"bin": {
"peptfilter": "./bin/peptfilter.js",
"prot2pept": "./bin/prot2pept.js",
"uniprot": "./bin/uniprot.js"
},
"scripts": {
Expand All @@ -17,6 +18,7 @@
"test": "NODE_OPTIONS='--experimental-vm-modules --no-warnings' yarn run jest",
"typecheck": "yarn tsc --skipLibCheck --noEmit",
"peptfilter": "yarn run tsx bin/peptfilter.ts",
"prot2pept": "yarn run tsx bin/prot2pept.ts",
"uniprot": "yarn run tsx bin/uniprot.ts"
},
"dependencies": {
Expand Down
124 changes: 124 additions & 0 deletions tests/commands/prot2pept.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import { Prot2pept } from '../../lib/commands/prot2pept';
import { jest } from '@jest/globals';
import * as mock from 'mock-stdin';

let output: string[];
let error: string[];
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const writeSpy = jest
.spyOn(process.stdout, "write")
.mockImplementation((data: unknown) => { output.push(data as string); return true; });
const errorSpy = jest
.spyOn(process.stderr, "write")
.mockImplementation((data: unknown) => { error.push(data as string); return true; });

beforeEach(() => {
output = [];
error = [];
});

test('test single line input 1', async () => {
const stdin = mock.stdin();

const command = new Prot2pept();
const run = command.run();

stdin.send("AALTERAALTERPAALTER\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd()).toBe("AALTER\nAALTERPAALTER");
});

test('test single line input 2', async () => {
const stdin = mock.stdin();

const command = new Prot2pept();
const run = command.run();

stdin.send("KRKPR\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd()).toBe("K\nR\nKPR");
});

test('test multi line input', async () => {
const stdin = mock.stdin();

const command = new Prot2pept();
const run = command.run();

stdin.send("AALTERAALTERPAALTER\n");
stdin.send("AALTERAA\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd()).toBe("AALTER\nAALTERPAALTER\nAALTER\nAA");
});

test('test fasta input 1', async () => {
const stdin = mock.stdin();

const command = new Prot2pept();
const run = command.run();

stdin.send(">AKA\nAALTERAALTERPAALTER\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd()).toBe(">AKA\nAALTER\nAALTERPAALTER");
});

test('test fasta input 2', async () => {
const stdin = mock.stdin();

const command = new Prot2pept();
const run = command.run();

stdin.send(">AKA\nAAL\nT\nERAALTER\nP\nAALTER\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd()).toBe(">AKA\nAALTER\nAALTERPAALTER");
});

test('test fasta input 3', async () => {
const stdin = mock.stdin();

const command = new Prot2pept();
const run = command.run();

stdin.send(">AKA\nAAL\nT\n>\nERAALTER\nP\nAALTER");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd()).toBe(">AKA\nAALT\n>\nER\nAALTERPAALTER");
});

test('test custom pattern', async () => {
const stdin = mock.stdin();

const command = new Prot2pept({ args: ["--pattern", "([KR])([^A])"] });
const run = command.run();

stdin.send("AALTERAALTERPAALTER\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd()).toBe("AALTERAALTER\nPAALTER");
});
1 change: 1 addition & 0 deletions tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
"module": "NodeNext",
"strict": true,
"resolveJsonModule": true,
"target": "esnext"
}
}

0 comments on commit ad82132

Please sign in to comment.