Skip to content

Commit

Permalink
Merge pull request #179 from unipept/next-unipept-fasta
Browse files Browse the repository at this point in the history
Add fasta support
  • Loading branch information
bmesuere authored Aug 5, 2024
2 parents c9aa3aa + b21872d commit eac4282
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 43 deletions.
67 changes: 52 additions & 15 deletions lib/commands/unipept/unipept_subcommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ export abstract class UnipeptSubcommand {
selectedFields?: RegExp[];
fasta: boolean;

// we must save this to be able to close it properly in tests
private streamInterface?: Interface;

constructor(name: string) {
this.name = name;
const version = JSON.parse(readFileSync(new URL("../../../package.json", import.meta.url), "utf8")).version;
this.user_agent = `unipept-cli/${version}`;
this.command = this.create(name);
this.fasta = false;
}

abstract defaultBatchSize(): number;

requiredFields(): string[] {
Expand Down Expand Up @@ -58,19 +62,17 @@ export abstract class UnipeptSubcommand {
this.outputStream = createWriteStream(this.options.output);
}

let slice = [];

for await (const input of this.getInputIterator(args, options.input)) {
slice.push(input);
if (slice.length >= this.batchSize) {
await this.processBatch(slice);
slice = [];
}
const iterator = this.getInputIterator(args, options.input);
const firstLine = (await iterator.next()).value;
if (firstLine.startsWith(">")) {
this.fasta = true;
await this.fastaInputProcessor(firstLine, iterator);
} else {
await this.normalInputProcessor(firstLine, iterator);
}
await this.processBatch(slice);
}

async processBatch(slice: string[]): Promise<void> {
async processBatch(slice: string[], fastaMapper?: { [key: string]: string }): Promise<void> {
if (!this.formatter) throw new Error("Formatter not set");

const r = await fetch(this.url as string, {
Expand All @@ -87,11 +89,44 @@ export abstract class UnipeptSubcommand {
this.outputStream.write(this.formatter.header(result, this.fasta));
}

this.outputStream.write(this.formatter.format(result, this.fasta, this.firstBatch));
this.outputStream.write(this.formatter.format(result, fastaMapper, this.firstBatch));

if (this.firstBatch) this.firstBatch = false;
}

async normalInputProcessor(firstLine: string, iterator: IterableIterator<string> | AsyncIterableIterator<string>) {
let slice = [firstLine];

for await (const line of iterator) {
slice.push(line);
if (slice.length >= this.batchSize) {
await this.processBatch(slice);
slice = [];
}
}
await this.processBatch(slice);
}

async fastaInputProcessor(firstLine: string, iterator: IterableIterator<string> | AsyncIterableIterator<string>) {
let currentFastaHeader = firstLine;
let slice = [];
let fastaMapper: { [key: string]: string } = {};
for await (const line of iterator) {
if (line.startsWith(">")) {
currentFastaHeader = line;
} else {
fastaMapper[line] = currentFastaHeader;
slice.push(line);
if (slice.length >= this.batchSize) {
await this.processBatch(slice, fastaMapper);
slice = [];
fastaMapper = {};
}
}
}
await this.processBatch(slice, fastaMapper);
}

private constructRequestBody(slice: string[]): URLSearchParams {
const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$"));
return new URLSearchParams({
Expand Down Expand Up @@ -128,13 +163,15 @@ export abstract class UnipeptSubcommand {
* - if an input file is given, use the file
* - otherwise, use standard input
*/
private getInputIterator(args: string[], input?: string): string[] | Interface {
private getInputIterator(args: string[], input?: string): IterableIterator<string> | AsyncIterableIterator<string> {
if (args.length > 0) {
return args;
return args.values();
} else if (input) {
return createInterface({ input: createReadStream(input) });
this.streamInterface = createInterface({ input: createReadStream(input) });
return this.streamInterface[Symbol.asyncIterator]();
} else {
return createInterface({ input: process.stdin })
this.streamInterface = createInterface({ input: process.stdin });
return this.streamInterface[Symbol.asyncIterator]();
}
}

Expand Down
11 changes: 7 additions & 4 deletions lib/formatters/formatter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@ export abstract class Formatter {
abstract footer(): string;
abstract convert(data: object[], first?: boolean): string;

format(data: object[], fastaMapper?: boolean, first?: boolean): string {
format(data: object[], fastaMapper?: { [key: string]: string }, first?: boolean): string {
if (fastaMapper) {
data = this.integrateFastaHeaders(data, fastaMapper);
data = this.integrateFastaHeaders(data as { [key: string]: string }[], fastaMapper);
}
return this.convert(data, first);
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
integrateFastaHeaders(data: object[], fastaMapper: boolean): object[] {
integrateFastaHeaders(data: { [key: string]: string }[], fastaMapper: { [key: string]: string }): object[] {
const key = Object.keys(data[0])[0];
data.forEach((entry, i) => {
data[i] = Object.assign({ fasta_header: fastaMapper[entry[key]] }, entry);
});
return data;
}
}
4 changes: 2 additions & 2 deletions lib/formatters/to_xml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,6 @@ function _isArray(array) {
return array instanceof Array;
}

export function toXML(value: object, replacer?: function, space?: number | string): string {
return _toXML(value, replacer, space);
export function toXML(value: object): string {
return _toXML(value);
}
18 changes: 8 additions & 10 deletions tests/commands/unipept/unipept_subcommand.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { Interface } from 'readline';
import { Pept2lca } from '../../../lib/commands/unipept/pept2lca';

test('test command setup', () => {
Expand Down Expand Up @@ -28,19 +27,18 @@ test('test correct inputIterator', async () => {
const command = new Pept2lca();

// should be stdin
let input = command["getInputIterator"]([]) as Interface;
expect(input).toBeInstanceOf(Interface);
input.close();
let input = command["getInputIterator"]([]) as AsyncIterableIterator<string>;
expect(typeof input[Symbol.asyncIterator]).toBe("function");
command['streamInterface']?.close();

// should be a (non-existant) file and error
input = command["getInputIterator"]([], "filename") as Interface;
input.on("error", (e) => {
expect(e.toString()).toMatch(/no such file/);
});
input = command["getInputIterator"]([], "filename") as AsyncIterableIterator<string>;
expect(typeof input[Symbol.asyncIterator]).toBe("function");
await expect(async () => { await input.next() }).rejects.toThrow(/no such file/);

// should be array
const inputArray = command["getInputIterator"](["A", "B"]);
expect(inputArray).toBeInstanceOf(Array);
const inputArray = command["getInputIterator"](["A", "B"]) as IterableIterator<string>;
expect(typeof inputArray[Symbol.iterator]).toBe("function");
});

test('test selected fields parsing', () => {
Expand Down
8 changes: 4 additions & 4 deletions tests/formatters/csv_formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ test('test convert', () => {
});

test('test format with fasta', () => {
//const fasta = [['>test', '5']];
//const object = [TestObject.testObject(), TestObject.testObject()];
//const csv = [`>test,${TestObject.asCsv()}`, TestObject.asCsv(), ""].join("\n");
//expect(formatter.format(object, fasta, false)).toBe(csv);
const fasta = { 5: ">test" };
const object = [TestObject.testObject(), TestObject.testObject()];
const csv = [`>test,${TestObject.asCsv()}`, `>test,${TestObject.asCsv()}`, ""].join("\n");
expect(formatter.format(object, fasta, false)).toBe(csv);
});
12 changes: 12 additions & 0 deletions tests/formatters/formatter.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { FormatterFactory } from "../../lib/formatters/formatter_factory";
import { TestObject } from "./test_object";

test('test integrate fasta headers', async () => {
const formatter = FormatterFactory.getFormatter("csv");
const fasta = { 5: ">test" };
const object = [TestObject.testObject(), TestObject.testObject()];
const integrated = [Object.assign({ fasta_header: ">test" }, TestObject.testObject()), Object.assign({ fasta_header: ">test" }, TestObject.testObject())];
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
expect(formatter.integrateFastaHeaders(object, fasta)).toEqual(integrated);
});
8 changes: 4 additions & 4 deletions tests/formatters/json_formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ test('test convert', () => {
});

test('test format with fasta', () => {
//const fasta = [['>test', '5']];
//const object = [TestObject.testObject()];
//const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}';
//expect(formatter.format(object, fasta, true)).toBe(json);
const fasta = { 5: ">test" };
const object = [TestObject.testObject()];
const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}';
expect(formatter.format(object, fasta, true)).toBe(json);
});
8 changes: 4 additions & 4 deletions tests/formatters/xml_formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ test('test convert', () => {
});

test('test format with fasta', () => {
//const fasta = [['>test', '5']];
//const object = [TestObject.testObject()];
//const json = '{"fasta_header":">test","integer":5,"string":"string","list":["a",2,false]}';
//expect(formatter.format(object, fasta, true)).toBe(json);
const fasta = { 5: ">test" };
const object = [TestObject.testObject()];
const xml = `<result><fasta_header>&gt;test</fasta_header>${TestObject.asXml()}</result>`;
expect(formatter.format(object, fasta, true)).toBe(xml);
});

0 comments on commit eac4282

Please sign in to comment.