Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port peptfilter to typescript #175

Merged
merged 6 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bin/peptfilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env node

import { Peptfilter } from '../lib/commands/peptfilter.js';
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this import from a "js" file? Shouldn't this be "ts"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can make .js, .ts and nothing work with the right settings. I would have prefered using no extension, but couldn't get it to work in all conditions: the typescript compiler itself, running it after compilation, running it with yarn and running it through tests. This is the only way I managed to get it to work under all conditions.


const command = new Peptfilter();
command.run();
2 changes: 1 addition & 1 deletion jest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ const config: Config = {
// notifyMode: "failure-change",

// A preset that is used as a base for Jest's configuration
// preset: undefined,
preset: 'ts-jest/presets/default-esm',

// Run tests from one or more projects
// projects: undefined,
Expand Down
7 changes: 4 additions & 3 deletions lib/commands/base_command.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Command } from "commander";
import { version } from '../../package.json';
import { readFileSync } from "fs";

/**
* This is a base class which provides a common interface for all commands.
Expand All @@ -11,8 +11,10 @@ import { version } from '../../package.json';
export abstract class BaseCommand {
public program: Command;
args: string[] | undefined;
version: string;

constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean, args?: string[] }) {
this.version = JSON.parse(readFileSync(new URL("../../package.json", import.meta.url), "utf8")).version;
this.program = this.create(options);
this.args = options?.args;
}
Expand All @@ -37,8 +39,7 @@ export abstract class BaseCommand {
writeErr: () => { }
});
}

program.version(version);
program.version(this.version);

return program;
}
Expand Down
67 changes: 67 additions & 0 deletions lib/commands/peptfilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { createInterface } from 'node:readline';
import { BaseCommand } from './base_command.js';

export class Peptfilter extends BaseCommand {

readonly description = `The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed to standard input.

The input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled.`;

constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean, args?: string[] }) {
super(options);

this.program
.summary("Filter peptides based on specific criteria.")
.description(this.description)
.option("--minlen <length>", "only retain peptides having at least this many amino acids", (d) => parseInt(d, 10), 5)
.option("--maxlen <length>", "only retain peptides having at most this many amino acids", (d) => parseInt(d, 10), 50)
.option("-l, --lacks <amino acids>", "only retain peptides that lack all of the specified amino acids", (d) => d.split(""))
.option("-c, --contains <amino acids>", "only retain peptides that contain all of the specified amino acids", (d) => d.split(""));
}

/**
* Performance note: this implementation takes 4 seconds to run on swissprot. It can be made faster by using line events instead of
* async iterators. This alternative implementation runs in 2.5 seconds. However, I decided that the async iterator implementation is
* both more readable and more in line with the implementation of the other commands.
*/
async run() {
this.parseArguments();
const minLen = this.program.opts().minlen;
const maxlen = this.program.opts().maxlen;
const lacks = this.program.opts().lacks || [];
const contains = this.program.opts().contains || [];

// buffering output makes a big difference in performance
let output = [];
let i = 0;

for await (const line of createInterface({ input: process.stdin })) {
i++;
if (line.startsWith(">")) { // pass through FASTA headers
output.push(line);
} else if (Peptfilter.checkLength(line, minLen, maxlen) && Peptfilter.checkLacks(line, lacks) && Peptfilter.checkContains(line, contains)) {
output.push(line);
}
if (i % 1000 === 0) {
output.push(""); //add a newline at the end of the buffer without additional string copy
process.stdout.write(output.join("\n"));
output = [];
}
}

output.push("");
process.stdout.write(output.join("\n"));
}

static checkLength(line: string, minLen: number, maxlen: number): boolean {
return line.length >= minLen && line.length <= maxlen;
}

static checkLacks(line: string, lacks: string[]): boolean {
return lacks.every((aa: string) => !line.includes(aa));
}

static checkContains(line: string, contains: string[]): boolean {
return contains.every((aa: string) => line.includes(aa));
}
}
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
"private": false,
"type": "module",
"bin": {
"peptfilter": "./bin/peptfilter.js",
"uniprot": "./bin/uniprot.js"
},
"scripts": {
"build": "yarn run tsc",
"lint": "yarn run eslint",
"test": "yarn run jest",
"test": "NODE_OPTIONS='--experimental-vm-modules --no-warnings' yarn run jest",
"typecheck": "yarn tsc --skipLibCheck --noEmit",
"peptfilter": "yarn run tsx bin/peptfilter.ts",
"uniprot": "yarn run tsx bin/uniprot.ts"
},
"dependencies": {
Expand Down
99 changes: 99 additions & 0 deletions tests/commands/peptfilter.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { Peptfilter } from '../../lib/commands/peptfilter';
import { jest } from '@jest/globals';
import * as mock from 'mock-stdin';

let output: string[];
let error: string[];
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const writeSpy = jest
.spyOn(process.stdout, "write")
.mockImplementation((data: unknown) => { output.push(data as string); return true; });
const errorSpy = jest
.spyOn(process.stderr, "write")
.mockImplementation((data: unknown) => { error.push(data as string); return true; });

beforeEach(() => {
output = [];
error = [];
});

test('test length filter', async () => {
// min length
expect(Peptfilter.checkLength('AALER', 4, 10)).toBe(true);
expect(Peptfilter.checkLength('AALER', 5, 10)).toBe(true);
expect(Peptfilter.checkLength('AALER', 6, 10)).toBe(false);

// max length
expect(Peptfilter.checkLength('AALER', 1, 4)).toBe(false);
expect(Peptfilter.checkLength('AALER', 1, 5)).toBe(true);
expect(Peptfilter.checkLength('AALER', 1, 6)).toBe(true);
});

test('test lacks filter', async () => {
expect(Peptfilter.checkLacks('AALER', ''.split(""))).toBe(true);
expect(Peptfilter.checkLacks('AALER', 'BCD'.split(""))).toBe(true);
expect(Peptfilter.checkLacks('AALER', 'A'.split(""))).toBe(false);
expect(Peptfilter.checkLacks('AALER', 'AE'.split(""))).toBe(false);
});

test('test contains filter', async () => {
expect(Peptfilter.checkContains('AALER', ''.split(""))).toBe(true);
expect(Peptfilter.checkContains('AALER', 'A'.split(""))).toBe(true);
expect(Peptfilter.checkContains('AALER', 'AE'.split(""))).toBe(true);
expect(Peptfilter.checkContains('AALER', 'BCD'.split(""))).toBe(false);
expect(Peptfilter.checkContains('AALER', 'AB'.split(""))).toBe(false);
});

test('test default filter from stdin', async () => {
const stdin = mock.stdin();

const command = new Peptfilter();
const run = command.run();

stdin.send("AAAA\n");
stdin.send("AAAAA\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd().split("\n").length).toBe(1);
});

test('test if it passes fasta from stdin', async () => {
const stdin = mock.stdin();

const command = new Peptfilter();
const run = command.run();

stdin.send(">AA\n");
stdin.send("AAA\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd().split("\n").length).toBe(1);
expect(output[0]).toBe(">AA\n");
});

test('test complex example from stdin', async () => {
const stdin = mock.stdin();

const command = new Peptfilter({ args: ["--minlen", "4", "--maxlen", "10", "--lacks", "B", "--contains", "A"] });
const run = command.run();

stdin.send("A\n");
stdin.send("AAAAAAAAAAA\n");
stdin.send("AAAAB\n");
stdin.send("BBBBB\n");
stdin.send("CCCCC\n");
stdin.send("CCCCCA\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd().split("\n").length).toBe(1);
expect(output[0]).toBe("CCCCCA\n");
});
1 change: 1 addition & 0 deletions tests/commands/uniprot.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Uniprot } from '../../lib/commands/uniprot';
import { jest } from '@jest/globals';
import * as mock from 'mock-stdin';

let output: string[];
Expand Down