Skip to content

Commit

Permalink
Fix linting and formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
stijndcl committed Jan 13, 2024
1 parent c39ef6a commit fa9f0aa
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 25 deletions.
48 changes: 31 additions & 17 deletions scripts/helper_scripts/unipept-database-rs/src/bin/taxa-by-chunk.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::fs::{File, read_dir};
use std::fs::{read_dir, File};
use std::io::{BufRead, BufWriter, Write};
use std::path::{Path, PathBuf};

Expand All @@ -24,7 +24,10 @@ fn main() -> Result<()> {
continue;
}

let taxa_id: u64 = line.trim().parse().with_context(|| format!("Error parsing {line} as an integer"))?;
let taxa_id: u64 = line
.trim()
.parse()
.with_context(|| format!("Error parsing {line} as an integer"))?;
all_taxa.push(taxa_id);
}

Expand All @@ -38,8 +41,10 @@ fn main() -> Result<()> {
}

let base_name = match path.file_name() {
None => {continue;}
Some(n) => n.to_str().context("Error creating string from file path")?
None => {
continue;
}
Some(n) => n.to_str().context("Error creating string from file path")?,
};

if !chunk_file_regex.is_match(base_name) {
Expand All @@ -48,25 +53,34 @@ fn main() -> Result<()> {

// Parse the taxa range out of the filename
let replaced_name = base_name.replace("unipept.", "").replace(".chunk.gz", "");
let range = replaced_name.split_once("-");
let range = replaced_name.split_once('-');
let range = range.with_context(|| format!("Unable to split {replaced_name} on '-'"))?;
let start: u64 = range.0.parse().with_context(|| format!("Error parsing {} as an integer", range.0))?;
let end: u64 = range.1.parse().with_context(|| format!("Error parsing {} as an integer", range.1))?;

let matching_taxa: Vec<&u64> = all_taxa.iter().filter(|&t| start <= *t && *t <= end).collect();
let start: u64 = range
.0
.parse()
.with_context(|| format!("Error parsing {} as an integer", range.0))?;
let end: u64 = range
.1
.parse()
.with_context(|| format!("Error parsing {} as an integer", range.1))?;

let matching_taxa: Vec<&u64> = all_taxa
.iter()
.filter(|&t| start <= *t && *t <= end)
.collect();

// Write matches to a temporary output file
if !matching_taxa.is_empty() {
let mapped_taxa: Vec<String> = matching_taxa.iter().map(|&t| format!("\t{t}$")).collect();
let mapped_taxa: Vec<String> =
matching_taxa.iter().map(|&t| format!("\t{t}$")).collect();
let joined_taxa = mapped_taxa.join("\n");

let temp_file_path = Path::new(&args.temp_dir).join(format!("{base_name}.pattern"));
let temp_file = File::create(&temp_file_path).context("Error creating temporary pattern file")?;
let temp_file =
File::create(&temp_file_path).context("Error creating temporary pattern file")?;
let mut writer = BufWriter::new(temp_file);
write!(
&mut writer,
"{joined_taxa}",
).context("Error writing to temporary pattern file")?;
write!(&mut writer, "{joined_taxa}",)
.context("Error writing to temporary pattern file")?;

// The two unwraps here can't be handled using the ? operator
println!("{}", temp_file_path.into_os_string().into_string().unwrap());
Expand All @@ -83,5 +97,5 @@ struct Cli {
chunk_dir: PathBuf,

#[clap(long)]
temp_dir: PathBuf
}
temp_dir: PathBuf,
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ fn main() -> Result<()> {
let next = TAXA_BOUNDS[idx + 1];
let file_name = format!("unipept.{bound}-{next}.chunk");
let file_path = Path::new(&args.output_dir).join(file_name);
let file_handler = File::create(file_path).with_context(|| format!("Unable to create output file {bound}-{next}"))?;
let file_handler = File::create(file_path)
.with_context(|| format!("Unable to create output file {bound}-{next}"))?;
let writer = BufWriter::new(file_handler);
file_streams.push(writer);
}
Expand All @@ -26,7 +27,9 @@ fn main() -> Result<()> {

// First read the header
let mut header: String = String::new();
reader.read_line(&mut header).context("Error reading header from stdin")?;
reader
.read_line(&mut header)
.context("Error reading header from stdin")?;
write_header(&args.output_dir, header)?;

// Then the rest of the data
Expand All @@ -38,7 +41,10 @@ fn main() -> Result<()> {
}

let spl: Vec<&str> = line.split('\t').collect();
let taxon_id = spl[8].trim().parse::<usize>().with_context(|| format!("Error parsing {} as an integer", spl[8]))?;
let taxon_id = spl[8]
.trim()
.parse::<usize>()
.with_context(|| format!("Error parsing {} as an integer", spl[8]))?;

// Find the index of this taxon id in the array
// Note that this can be sped up using binary search (see Python's bisect.bisect_left),
Expand All @@ -63,15 +69,15 @@ struct Cli {
}

const TAXA_BOUNDS: [usize; 45] = [
0, 550, 1352, 3047, 5580, 8663, 11676, 32473, 40214, 52774, 66656, 86630, 116960, 162147, 210225, 267979, 334819,
408172, 470868, 570509, 673318, 881260, 1046115, 1136135, 1227077, 1300307, 1410620, 1519492, 1650438, 1756149,
1820614, 1871070, 1898104, 1922217, 1978231, 2024617, 2026757, 2035430, 2070414, 2202732, 2382165, 2527964, 2601669,
2706029, 10000000
0, 550, 1352, 3047, 5580, 8663, 11676, 32473, 40214, 52774, 66656, 86630, 116960, 162147,
210225, 267979, 334819, 408172, 470868, 570509, 673318, 881260, 1046115, 1136135, 1227077,
1300307, 1410620, 1519492, 1650438, 1756149, 1820614, 1871070, 1898104, 1922217, 1978231,
2024617, 2026757, 2035430, 2070414, 2202732, 2382165, 2527964, 2601669, 2706029, 10000000,
];

fn write_header(output_dir: &PathBuf, header: String) -> Result<()> {
let file_path = Path::new(output_dir).join("db.header");
let file_handler = File::create(file_path).with_context(|| format!("Unable to create header output file"))?;
let file_handler = File::create(file_path).context("Unable to create header output file")?;
let mut writer = BufWriter::new(file_handler);

write!(&mut writer, "{}", header).context("Error writing header")?;
Expand Down

0 comments on commit fa9f0aa

Please sign in to comment.