diff --git a/Cargo.toml b/Cargo.toml index 94797d3..32b0248 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "frag_gene_scan_rs" -version = "0.3.1" +version = "0.3.2" authors = ["Felix Van der Jeugt "] edition = "2018" diff --git a/src/bin/FragGeneScanRs.rs b/src/bin/FragGeneScanRs.rs index 29b5f59..b513443 100644 --- a/src/bin/FragGeneScanRs.rs +++ b/src/bin/FragGeneScanRs.rs @@ -180,30 +180,74 @@ fn run( let aastream = aastream.map(Mutex::new); let metastream = metastream.map(Mutex::new); let dnastream = dnastream.map(Mutex::new); - fasta::Reader::new(inputseqs) - .into_records() + Chunked::new(100, fasta::Reader::new(inputseqs).into_records()) .par_bridge() - .map(|record| { - let fasta::OwnedRecord { mut head, seq } = record?; - head = head.into_iter().take_while(u8::is_ascii_graphic).collect(); - let nseq: Vec = seq.into_iter().map(Nuc::from).collect(); - let read_prediction = viterbi( - &global, - &locals[count_cg_content(&nseq)], - head, - nseq, - whole_genome, - ); + .map(|recordvec| { + let mut metabuf = Vec::new(); + let mut dnabuf = Vec::new(); + let mut aabuf = Vec::new(); + for record in recordvec { + let fasta::OwnedRecord { mut head, seq } = record?; + head = head.into_iter().take_while(u8::is_ascii_graphic).collect(); + let nseq: Vec = seq.into_iter().map(Nuc::from).collect(); + let read_prediction = viterbi( + &global, + &locals[count_cg_content(&nseq)], + head, + nseq, + whole_genome, + ); + if metastream.is_some() { + read_prediction.meta(&mut metabuf)?; + } + if dnastream.is_some() { + read_prediction.dna(&mut dnabuf, formatted)?; + } + if aastream.is_some() { + read_prediction.protein(&mut aabuf, whole_genome)?; + } + } if let Some(metastream) = &metastream { - read_prediction.print_meta(&mut *metastream.lock().unwrap())?; // TODO lock together content + metastream.lock().unwrap().write_all(&metabuf)?; } if let Some(dnastream) = &dnastream { - read_prediction.print_dna(&mut *dnastream.lock().unwrap(), formatted)?; + dnastream.lock().unwrap().write_all(&dnabuf)?; } if let Some(aastream) = &aastream { - read_prediction.print_protein(whole_genome, &mut *aastream.lock().unwrap())?; + aastream.lock().unwrap().write_all(&aabuf)?; } Ok(()) }) .collect() } + +struct Chunked { + size: usize, + iterator: I, +} + +impl Chunked { + fn new(size: usize, iterator: I) -> Self { + Chunked { size, iterator } + } +} + +impl Iterator for Chunked { + type Item = Vec; + + fn next(&mut self) -> Option { + let mut items = Vec::with_capacity(self.size); + for _ in 0..self.size { + if let Some(item) = self.iterator.next() { + items.push(item); + } else { + break; + } + } + if items.is_empty() { + None + } else { + Some(items) + } + } +} diff --git a/src/gene.rs b/src/gene.rs index a00cd87..972c9bb 100644 --- a/src/gene.rs +++ b/src/gene.rs @@ -1,7 +1,3 @@ -use std::fs::File; -use std::io; -use std::io::Write; - extern crate thiserror; use thiserror::Error; @@ -21,30 +17,26 @@ impl ReadPrediction { } } - pub fn print_meta(&self, file: &mut File) -> Result<(), GeneError> { + pub fn meta(&self, buf: &mut Vec) -> Result<(), GeneError> { if !self.genes.is_empty() { - file.write_all(&format!(">{}\n", std::str::from_utf8(&self.head)?).into_bytes())?; + buf.append(&mut format!(">{}\n", std::str::from_utf8(&self.head)?).into_bytes()) } for gene in &self.genes { - gene.print_meta(file)?; + gene.meta(buf); } Ok(()) } - pub fn print_dna(&self, file: &mut File, formatted: bool) -> Result<(), GeneError> { + pub fn dna(&self, buf: &mut Vec, formatted: bool) -> Result<(), GeneError> { for gene in &self.genes { - gene.print_dna(file, &self.head, formatted)?; + gene.dna(buf, &self.head, formatted)?; } Ok(()) } - pub fn print_protein( - &self, - whole_genome: bool, - file: &mut W, - ) -> Result<(), GeneError> { + pub fn protein(&self, buf: &mut Vec, whole_genome: bool) -> Result<(), GeneError> { for gene in &self.genes { - gene.print_protein(file, &self.head, whole_genome)?; + gene.protein(buf, &self.head, whole_genome)?; } Ok(()) } @@ -63,9 +55,9 @@ pub struct Gene { } impl Gene { - pub fn print_meta(&self, file: &mut File) -> Result<(), GeneError> { - file.write_all( - &format!( + pub fn meta(&self, buf: &mut Vec) { + buf.append( + &mut format!( "{}\t{}\t{}\t{}\t{:.6}\tI:{}\tD:{}\n", self.metastart, self.end, @@ -82,16 +74,10 @@ impl Gene { .collect::() ) .into_bytes(), - )?; - Ok(()) + ); } - pub fn print_dna( - &self, - file: &mut File, - head: &Vec, - formatted: bool, - ) -> Result<(), GeneError> { + pub fn dna(&self, buf: &mut Vec, head: &Vec, formatted: bool) -> Result<(), GeneError> { let dna: Vec = match (self.forward_strand, formatted) { (true, true) => self.dna.iter().map(|&n| u8::from(n)).collect(), (true, false) => self @@ -110,8 +96,8 @@ impl Gene { .collect(), }; - file.write_all( - &format!( + buf.append( + &mut format!( ">{}_{}_{}_{}\n{}\n", std::str::from_utf8(head)?, self.start, @@ -120,14 +106,14 @@ impl Gene { std::str::from_utf8(&dna)?, ) .into_bytes(), - )?; + ); Ok(()) } - pub fn print_protein( + pub fn protein( &self, - file: &mut W, + buf: &mut Vec, head: &Vec, whole_genome: bool, ) -> Result<(), GeneError> { @@ -167,8 +153,8 @@ impl Gene { } } - file.write_all( - &format!( + buf.append( + &mut format!( ">{}_{}_{}_{}\n{}\n", std::str::from_utf8(head)?, self.start, @@ -177,15 +163,13 @@ impl Gene { std::str::from_utf8(&protein)?, ) .into_bytes(), - )?; + ); Ok(()) } } #[derive(Error, Debug)] pub enum GeneError { - #[error("could not write to file")] - IoError(#[from] io::Error), #[error("could not convert header back to UTF-8")] Utf8Error(#[from] std::str::Utf8Error), }