Skip to content

Commit

Permalink
working solution for #65
Browse files Browse the repository at this point in the history
  • Loading branch information
donaldcampbelljr committed Jan 9, 2025
1 parent 391ba68 commit 5f5973b
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 11 deletions.
1 change: 1 addition & 0 deletions gtars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ base64-url = "2.0.0"
sha2 = "0.10.7"
md-5 = "0.10.5"
seq_io = "0.3.2"
serde_json = "1.0.135"


[dev-dependencies]
Expand Down
65 changes: 60 additions & 5 deletions gtars/src/uniwig/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use indicatif::ProgressBar;

use rayon::prelude::*;
use std::error::Error;
use std::fs::File;
use std::fs::{remove_file, File};
use std::io::{BufRead, BufReader, BufWriter, Write};

use crate::uniwig::counting::{
Expand Down Expand Up @@ -237,6 +237,8 @@ pub fn uniwig_main(
meta_data_file_names[1] = format!("{}{}.{}", bwfileheader, "end", "meta");
meta_data_file_names[2] = format!("{}{}.{}", bwfileheader, "core", "meta");

let mut npy_meta_data_map: HashMap<String, HashMap<String, i32>> = HashMap::new();

let chrom_sizes = match read_chromosome_sizes(chromsizerefpath) {
// original program gets chromosome size from a .sizes file, e.g. chr1 248956422
// the original program simply pushes 0's until the end of the chromosome length and writes these to file.
Expand All @@ -251,17 +253,17 @@ pub fn uniwig_main(
match input_filetype {
//BED AND NARROWPEAK WORKFLOW
Ok(FileType::BED) | Ok(FileType::NARROWPEAK) => {
// Pare down chromosomes if necessary
let mut final_chromosomes =
get_final_chromosomes(&input_filetype, filepath, &chrom_sizes, score);

// Some housekeeping depending on output type
let og_output_type = output_type; // need this later for conversion
let mut output_type = output_type;
if output_type == "bedgraph" || output_type == "bw" || output_type == "bigwig" {
output_type = "bedGraph" // we must create bedgraphs first before creating bigwig files
}

// Pare down chromosomes if necessary
let mut final_chromosomes =
get_final_chromosomes(&input_filetype, filepath, &chrom_sizes, score);

let bar = ProgressBar::new(final_chromosomes.len() as u64);

// Pool installs iterator via rayon crate
Expand Down Expand Up @@ -587,6 +589,59 @@ pub fn uniwig_main(
);
}
}
"npy" =>{
// populate hashmap for the npy meta data
for chromosome in final_chromosomes.iter(){
let chr_name = chromosome.chrom.clone();
let current_chrom_size =
*chrom_sizes.get(&chromosome.chrom).unwrap() as i32;
npy_meta_data_map.insert(
chr_name,
HashMap::from([
("stepsize".to_string(), stepsize),
("reported_chrom_size".to_string(), current_chrom_size),
]),
);
}

for location in vec_count_type.iter() {

let temp_meta_file_name = format!("{}{}.{}", bwfileheader, *location, "meta");

if let Ok(file) = File::open(&temp_meta_file_name) {

let reader = BufReader::new(file);

for line in reader.lines() {
let line = line.unwrap();
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() >= 3 {
let chrom = parts[1].split('=')
.nth(1)
.expect("Processing npy metadata file: Missing chromosome in line");
let start_str = parts[2].split('=')
.nth(1)
.expect("Processing npy metadata file: Missing start position in line");
let starting_position: i32 = start_str.parse().expect("Processing npy metadata file: Invalid start position");

if let Some(current_chr_data) = npy_meta_data_map.get_mut(chrom) {
current_chr_data.insert((*location.to_string()).parse().unwrap(), starting_position);
}
}
}
// Remove the file after it is used.
let path = std::path::Path::new(&temp_meta_file_name);
let _ = remove_file(path).unwrap();
}

}
//write combined metadata as json
let json_string = serde_json::to_string_pretty(&npy_meta_data_map).unwrap();
let combined_npy_meta_file_path = format!("{}{}.{}", bwfileheader, "npy_meta", "json");
let mut file = File::create(combined_npy_meta_file_path).unwrap();
file.write_all(json_string.as_bytes()).unwrap();

}
_ => {}
}
bar.finish();
Expand Down
6 changes: 0 additions & 6 deletions gtars/tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -673,10 +673,8 @@ mod tests {

let tempbedpath = format!("{}{}", path_to_crate, "/tests/data/test5.bed");
let combinedbedpath = tempbedpath.as_str();
//let combinedbedpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy4.bed";

let chromsizerefpath = combinedbedpath;
//let chromsizerefpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy.chrom.sizes";

let tempdir = tempfile::tempdir().unwrap();
let path = PathBuf::from(&tempdir.path());
Expand All @@ -685,12 +683,8 @@ mod tests {
let bwfileheader_path = path.into_os_string().into_string().unwrap();
let bwfileheader = bwfileheader_path.as_str();

//let bwfileheader = "/home/drc/Downloads/unwig_testing_19dec2024/output/npy_output/";
//let bwfileheader = "/home/drc/Downloads/unwig_testing_19dec2024/output/wig_output/";

let smoothsize: i32 = 2;
let output_type = "npy";
//let output_type = "wig";
let filetype = "bed";
let num_threads = 6;
let score = false;
Expand Down

0 comments on commit 5f5973b

Please sign in to comment.