Skip to content

Commit

Permalink
Merge branch 'input-blocks'
Browse files Browse the repository at this point in the history
* input-blocks:
  write to buffers in chunks
  • Loading branch information
Felix Van der Jeugt committed Aug 4, 2021
2 parents 9d59aac + b032ce8 commit 76e2bc3
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 53 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "frag_gene_scan_rs"
version = "0.3.1"
version = "0.3.2"
authors = ["Felix Van der Jeugt <[email protected]>"]
edition = "2018"

Expand Down
76 changes: 60 additions & 16 deletions src/bin/FragGeneScanRs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,30 +180,74 @@ fn run<R: Read + Send, W: Write + Send>(
let aastream = aastream.map(Mutex::new);
let metastream = metastream.map(Mutex::new);
let dnastream = dnastream.map(Mutex::new);
fasta::Reader::new(inputseqs)
.into_records()
Chunked::new(100, fasta::Reader::new(inputseqs).into_records())
.par_bridge()
.map(|record| {
let fasta::OwnedRecord { mut head, seq } = record?;
head = head.into_iter().take_while(u8::is_ascii_graphic).collect();
let nseq: Vec<Nuc> = seq.into_iter().map(Nuc::from).collect();
let read_prediction = viterbi(
&global,
&locals[count_cg_content(&nseq)],
head,
nseq,
whole_genome,
);
.map(|recordvec| {
let mut metabuf = Vec::new();
let mut dnabuf = Vec::new();
let mut aabuf = Vec::new();
for record in recordvec {
let fasta::OwnedRecord { mut head, seq } = record?;
head = head.into_iter().take_while(u8::is_ascii_graphic).collect();
let nseq: Vec<Nuc> = seq.into_iter().map(Nuc::from).collect();
let read_prediction = viterbi(
&global,
&locals[count_cg_content(&nseq)],
head,
nseq,
whole_genome,
);
if metastream.is_some() {
read_prediction.meta(&mut metabuf)?;
}
if dnastream.is_some() {
read_prediction.dna(&mut dnabuf, formatted)?;
}
if aastream.is_some() {
read_prediction.protein(&mut aabuf, whole_genome)?;
}
}
if let Some(metastream) = &metastream {
read_prediction.print_meta(&mut *metastream.lock().unwrap())?; // TODO lock together content
metastream.lock().unwrap().write_all(&metabuf)?;
}
if let Some(dnastream) = &dnastream {
read_prediction.print_dna(&mut *dnastream.lock().unwrap(), formatted)?;
dnastream.lock().unwrap().write_all(&dnabuf)?;
}
if let Some(aastream) = &aastream {
read_prediction.print_protein(whole_genome, &mut *aastream.lock().unwrap())?;
aastream.lock().unwrap().write_all(&aabuf)?;
}
Ok(())
})
.collect()
}

struct Chunked<I: Iterator> {
size: usize,
iterator: I,
}

impl<I: Iterator> Chunked<I> {
fn new(size: usize, iterator: I) -> Self {
Chunked { size, iterator }
}
}

impl<I: Iterator> Iterator for Chunked<I> {
type Item = Vec<I::Item>;

fn next(&mut self) -> Option<Self::Item> {
let mut items = Vec::with_capacity(self.size);
for _ in 0..self.size {
if let Some(item) = self.iterator.next() {
items.push(item);
} else {
break;
}
}
if items.is_empty() {
None
} else {
Some(items)
}
}
}
56 changes: 20 additions & 36 deletions src/gene.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
use std::fs::File;
use std::io;
use std::io::Write;

extern crate thiserror;
use thiserror::Error;

Expand All @@ -21,30 +17,26 @@ impl ReadPrediction {
}
}

pub fn print_meta(&self, file: &mut File) -> Result<(), GeneError> {
pub fn meta(&self, buf: &mut Vec<u8>) -> Result<(), GeneError> {
if !self.genes.is_empty() {
file.write_all(&format!(">{}\n", std::str::from_utf8(&self.head)?).into_bytes())?;
buf.append(&mut format!(">{}\n", std::str::from_utf8(&self.head)?).into_bytes())
}
for gene in &self.genes {
gene.print_meta(file)?;
gene.meta(buf);
}
Ok(())
}

pub fn print_dna(&self, file: &mut File, formatted: bool) -> Result<(), GeneError> {
pub fn dna(&self, buf: &mut Vec<u8>, formatted: bool) -> Result<(), GeneError> {
for gene in &self.genes {
gene.print_dna(file, &self.head, formatted)?;
gene.dna(buf, &self.head, formatted)?;
}
Ok(())
}

pub fn print_protein<W: Write>(
&self,
whole_genome: bool,
file: &mut W,
) -> Result<(), GeneError> {
pub fn protein(&self, buf: &mut Vec<u8>, whole_genome: bool) -> Result<(), GeneError> {
for gene in &self.genes {
gene.print_protein(file, &self.head, whole_genome)?;
gene.protein(buf, &self.head, whole_genome)?;
}
Ok(())
}
Expand All @@ -63,9 +55,9 @@ pub struct Gene {
}

impl Gene {
pub fn print_meta(&self, file: &mut File) -> Result<(), GeneError> {
file.write_all(
&format!(
pub fn meta(&self, buf: &mut Vec<u8>) {
buf.append(
&mut format!(
"{}\t{}\t{}\t{}\t{:.6}\tI:{}\tD:{}\n",
self.metastart,
self.end,
Expand All @@ -82,16 +74,10 @@ impl Gene {
.collect::<String>()
)
.into_bytes(),
)?;
Ok(())
);
}

pub fn print_dna(
&self,
file: &mut File,
head: &Vec<u8>,
formatted: bool,
) -> Result<(), GeneError> {
pub fn dna(&self, buf: &mut Vec<u8>, head: &Vec<u8>, formatted: bool) -> Result<(), GeneError> {
let dna: Vec<u8> = match (self.forward_strand, formatted) {
(true, true) => self.dna.iter().map(|&n| u8::from(n)).collect(),
(true, false) => self
Expand All @@ -110,8 +96,8 @@ impl Gene {
.collect(),
};

file.write_all(
&format!(
buf.append(
&mut format!(
">{}_{}_{}_{}\n{}\n",
std::str::from_utf8(head)?,
self.start,
Expand All @@ -120,14 +106,14 @@ impl Gene {
std::str::from_utf8(&dna)?,
)
.into_bytes(),
)?;
);

Ok(())
}

pub fn print_protein<W: Write>(
pub fn protein(
&self,
file: &mut W,
buf: &mut Vec<u8>,
head: &Vec<u8>,
whole_genome: bool,
) -> Result<(), GeneError> {
Expand Down Expand Up @@ -167,8 +153,8 @@ impl Gene {
}
}

file.write_all(
&format!(
buf.append(
&mut format!(
">{}_{}_{}_{}\n{}\n",
std::str::from_utf8(head)?,
self.start,
Expand All @@ -177,15 +163,13 @@ impl Gene {
std::str::from_utf8(&protein)?,
)
.into_bytes(),
)?;
);
Ok(())
}
}

#[derive(Error, Debug)]
pub enum GeneError {
#[error("could not write to file")]
IoError(#[from] io::Error),
#[error("could not convert header back to UTF-8")]
Utf8Error(#[from] std::str::Utf8Error),
}

0 comments on commit 76e2bc3

Please sign in to comment.