-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #39 from stijndcl/feature/rust-dat-parser
Rust DAT parser
- Loading branch information
Showing
15 changed files
with
938 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
31 changes: 31 additions & 0 deletions
31
scripts/helper_scripts/unipept-database-rs/src/bin/dat-parser.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
use anyhow::{Context, Result}; | ||
use clap::Parser; | ||
use unipept_database::dat_parser::uniprot_dat_parser; | ||
use unipept_database::dat_parser::utils::write_header; | ||
use unipept_database::uniprot::UniprotType; | ||
|
||
use unipept_database::utils::files::open_sin; | ||
|
||
fn main() -> Result<()> { | ||
let args = Cli::parse(); | ||
let reader = open_sin(); | ||
|
||
write_header(); | ||
let parser = uniprot_dat_parser(reader, args.threads); | ||
|
||
for entry in parser { | ||
entry | ||
.context("Error parsing DAT entry")? | ||
.write(&args.db_type); | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
#[derive(Parser, Debug)] | ||
struct Cli { | ||
#[clap(value_enum, short = 't', long, default_value_t = UniprotType::Swissprot)] | ||
db_type: UniprotType, | ||
#[clap(long, default_value_t = 0)] | ||
threads: usize, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
scripts/helper_scripts/unipept-database-rs/src/dat_parser/consumer.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
use crate::dat_parser::entry::UniProtDATEntry; | ||
use anyhow::{Context, Result}; | ||
use crossbeam_channel::{Receiver, Sender}; | ||
use std::thread; | ||
use std::thread::JoinHandle; | ||
|
||
/// A Consumer runs in a thread and constantly listens to a Receiver channel for raw data, | ||
/// publishing parsed `UniProtDatEntry`s to a Sender channel | ||
pub struct Consumer { | ||
handle: Option<JoinHandle<()>>, | ||
} | ||
|
||
impl Consumer { | ||
pub fn new() -> Self { | ||
Self { handle: None } | ||
} | ||
|
||
pub fn start(&mut self, receiver: Receiver<Vec<u8>>, sender: Sender<Result<UniProtDATEntry>>) { | ||
self.handle = Some(thread::spawn(move || { | ||
for data in receiver { | ||
// Cut out the \n// at the end | ||
let data_slice = &data[..data.len() - 3]; | ||
let mut lines: Vec<String> = String::from_utf8_lossy(data_slice) | ||
.split('\n') | ||
.map(|x| x.to_string()) | ||
.collect(); | ||
|
||
let entry = | ||
UniProtDATEntry::from_lines(&mut lines).context("Error parsing DAT entry"); | ||
sender | ||
.send(entry) | ||
.context("Error sending parsed DAT entry to receiver channel") | ||
.unwrap(); | ||
} | ||
})); | ||
} | ||
|
||
pub fn join(&mut self) { | ||
if let Some(h) = self.handle.take() { | ||
h.join().unwrap(); | ||
self.handle = None; | ||
} | ||
} | ||
} |
Oops, something went wrong.