Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fasta support #179

Merged
merged 3 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 52 additions & 15 deletions lib/commands/unipept/unipept_subcommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ export abstract class UnipeptSubcommand {
selectedFields?: RegExp[];
fasta: boolean;

// we must save this to be able to close it properly in tests
private streamInterface?: Interface;

constructor(name: string) {
this.name = name;
const version = JSON.parse(readFileSync(new URL("../../../package.json", import.meta.url), "utf8")).version;
this.user_agent = `unipept-cli/${version}`;
this.command = this.create(name);
this.fasta = false;
}

abstract defaultBatchSize(): number;

requiredFields(): string[] {
Expand Down Expand Up @@ -58,19 +62,17 @@ export abstract class UnipeptSubcommand {
this.outputStream = createWriteStream(this.options.output);
}

let slice = [];

for await (const input of this.getInputIterator(args, options.input)) {
slice.push(input);
if (slice.length >= this.batchSize) {
await this.processBatch(slice);
slice = [];
}
const iterator = this.getInputIterator(args, options.input);
const firstLine = (await iterator.next()).value;
if (firstLine.startsWith(">")) {
this.fasta = true;
await this.fastaInputProcessor(firstLine, iterator);
} else {
await this.normalInputProcessor(firstLine, iterator);
}
await this.processBatch(slice);
}

async processBatch(slice: string[]): Promise<void> {
async processBatch(slice: string[], fastaMapper?: { [key: string]: string }): Promise<void> {
if (!this.formatter) throw new Error("Formatter not set");

const r = await fetch(this.url as string, {
Expand All @@ -87,11 +89,44 @@ export abstract class UnipeptSubcommand {
this.outputStream.write(this.formatter.header(result, this.fasta));
}

this.outputStream.write(this.formatter.format(result, this.fasta, this.firstBatch));
this.outputStream.write(this.formatter.format(result, fastaMapper, this.firstBatch));

if (this.firstBatch) this.firstBatch = false;
}

async normalInputProcessor(firstLine: string, iterator: IterableIterator<string> | AsyncIterableIterator<string>) {
let slice = [firstLine];

for await (const line of iterator) {
slice.push(line);
if (slice.length >= this.batchSize) {
await this.processBatch(slice);
slice = [];
}
}
await this.processBatch(slice);
}

async fastaInputProcessor(firstLine: string, iterator: IterableIterator<string> | AsyncIterableIterator<string>) {
let currentFastaHeader = firstLine;
let slice = [];
let fastaMapper: { [key: string]: string } = {};
for await (const line of iterator) {
if (line.startsWith(">")) {
currentFastaHeader = line;
} else {
fastaMapper[line] = currentFastaHeader;
slice.push(line);
if (slice.length >= this.batchSize) {
await this.processBatch(slice, fastaMapper);
slice = [];
fastaMapper = {};
}
}
}
await this.processBatch(slice, fastaMapper);
}

private constructRequestBody(slice: string[]): URLSearchParams {
const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$"));
return new URLSearchParams({
Expand Down Expand Up @@ -128,13 +163,15 @@ export abstract class UnipeptSubcommand {
* - if an input file is given, use the file
* - otherwise, use standard input
*/
private getInputIterator(args: string[], input?: string): string[] | Interface {
private getInputIterator(args: string[], input?: string): IterableIterator<string> | AsyncIterableIterator<string> {
if (args.length > 0) {
return args;
return args.values();
} else if (input) {
return createInterface({ input: createReadStream(input) });
this.streamInterface = createInterface({ input: createReadStream(input) });
return this.streamInterface[Symbol.asyncIterator]();
} else {
return createInterface({ input: process.stdin })
this.streamInterface = createInterface({ input: process.stdin });
return this.streamInterface[Symbol.asyncIterator]();
}
}

Expand Down
11 changes: 7 additions & 4 deletions lib/formatters/formatter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@ export abstract class Formatter {
abstract footer(): string;
abstract convert(data: object[], first?: boolean): string;

format(data: object[], fastaMapper?: boolean, first?: boolean): string {
format(data: object[], fastaMapper?: { [key: string]: string }, first?: boolean): string {
if (fastaMapper) {
data = this.integrateFastaHeaders(data, fastaMapper);
data = this.integrateFastaHeaders(data as { [key: string]: string }[], fastaMapper);
}
return this.convert(data, first);
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
integrateFastaHeaders(data: object[], fastaMapper: boolean): object[] {
integrateFastaHeaders(data: { [key: string]: string }[], fastaMapper: { [key: string]: string }): object[] {
const key = Object.keys(data[0])[0];
data.forEach((entry, i) => {
data[i] = Object.assign({ fasta_header: fastaMapper[entry[key]] }, entry);
});
return data;
}
}
4 changes: 2 additions & 2 deletions lib/formatters/to_xml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,6 @@ function _isArray(array) {
return array instanceof Array;
}

export function toXML(value: object, replacer?: function, space?: number | string): string {
return _toXML(value, replacer, space);
export function toXML(value: object): string {
return _toXML(value);
}
18 changes: 8 additions & 10 deletions tests/commands/unipept/unipept_subcommand.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { Interface } from 'readline';
import { Pept2lca } from '../../../lib/commands/unipept/pept2lca';

test('test command setup', () => {
Expand Down Expand Up @@ -28,19 +27,18 @@ test('test correct inputIterator', async () => {
const command = new Pept2lca();

// should be stdin
let input = command["getInputIterator"]([]) as Interface;
expect(input).toBeInstanceOf(Interface);
input.close();
let input = command["getInputIterator"]([]) as AsyncIterableIterator<string>;
expect(typeof input[Symbol.asyncIterator]).toBe("function");
command['streamInterface']?.close();

// should be a (non-existant) file and error
input = command["getInputIterator"]([], "filename") as Interface;
input.on("error", (e) => {
expect(e.toString()).toMatch(/no such file/);
});
input = command["getInputIterator"]([], "filename") as AsyncIterableIterator<string>;
expect(typeof input[Symbol.asyncIterator]).toBe("function");
await expect(async () => { await input.next() }).rejects.toThrow(/no such file/);

// should be array
const inputArray = command["getInputIterator"](["A", "B"]);
expect(inputArray).toBeInstanceOf(Array);
const inputArray = command["getInputIterator"](["A", "B"]) as IterableIterator<string>;
expect(typeof inputArray[Symbol.iterator]).toBe("function");
});

test('test selected fields parsing', () => {
Expand Down
8 changes: 4 additions & 4 deletions tests/formatters/csv_formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ test('test convert', () => {
});

test('test format with fasta', () => {
//const fasta = [['>test', '5']];
//const object = [TestObject.testObject(), TestObject.testObject()];
//const csv = [`>test,${TestObject.asCsv()}`, TestObject.asCsv(), ""].join("\n");
//expect(formatter.format(object, fasta, false)).toBe(csv);
const fasta = { 5: ">test" };
const object = [TestObject.testObject(), TestObject.testObject()];
const csv = [`>test,${TestObject.asCsv()}`, `>test,${TestObject.asCsv()}`, ""].join("\n");
expect(formatter.format(object, fasta, false)).toBe(csv);
});
12 changes: 12 additions & 0 deletions tests/formatters/formatter.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { FormatterFactory } from "../../lib/formatters/formatter_factory";
import { TestObject } from "./test_object";

test('test integrate fasta headers', async () => {
const formatter = FormatterFactory.getFormatter("csv");
const fasta = { 5: ">test" };
const object = [TestObject.testObject(), TestObject.testObject()];
const integrated = [Object.assign({ fasta_header: ">test" }, TestObject.testObject()), Object.assign({ fasta_header: ">test" }, TestObject.testObject())];
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
expect(formatter.integrateFastaHeaders(object, fasta)).toEqual(integrated);
});
8 changes: 4 additions & 4 deletions tests/formatters/json_formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ test('test convert', () => {
});

test('test format with fasta', () => {
//const fasta = [['>test', '5']];
//const object = [TestObject.testObject()];
//const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}';
//expect(formatter.format(object, fasta, true)).toBe(json);
const fasta = { 5: ">test" };
const object = [TestObject.testObject()];
const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}';
expect(formatter.format(object, fasta, true)).toBe(json);
});
8 changes: 4 additions & 4 deletions tests/formatters/xml_formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ test('test convert', () => {
});

test('test format with fasta', () => {
//const fasta = [['>test', '5']];
//const object = [TestObject.testObject()];
//const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}';
//expect(formatter.format(object, fasta, true)).toBe(json);
const fasta = { 5: ">test" };
const object = [TestObject.testObject()];
const xml = `<result><fasta_header>&gt;test</fasta_header>${TestObject.asXml()}</result>`;
expect(formatter.format(object, fasta, true)).toBe(xml);
});