Skip to content

Commit

Permalink
Revert "attempt to use shared hashmap for #65 does not work"
Browse files Browse the repository at this point in the history
This reverts commit 27d52f5.
  • Loading branch information
donaldcampbelljr committed Jan 9, 2025
1 parent 27d52f5 commit 391ba68
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 68 deletions.
1 change: 0 additions & 1 deletion gtars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ base64-url = "2.0.0"
sha2 = "0.10.7"
md-5 = "0.10.5"
seq_io = "0.3.2"
serde_json = "1.0.135"


[dev-dependencies]
Expand Down
64 changes: 15 additions & 49 deletions gtars/src/uniwig/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ use std::str::FromStr;
use std::sync::{Arc, Mutex};
use std::thread;
use tokio::runtime;
use serde_json;

pub mod cli;
pub mod counting;
Expand Down Expand Up @@ -249,43 +248,19 @@ pub fn uniwig_main(
}
};

let mut npy_meta_data: HashMap<String, HashMap<String, i32>> = HashMap::new();
let mut arc_npy_meta_data = Arc::new(Mutex::new(npy_meta_data));
let mut chromosome_data_clone = Arc::clone(&arc_npy_meta_data);

match input_filetype {
//BED AND NARROWPEAK WORKFLOW
Ok(FileType::BED) | Ok(FileType::NARROWPEAK) => {
// Pare down chromosomes if necessary
let mut final_chromosomes =
get_final_chromosomes(&input_filetype, filepath, &chrom_sizes, score);

// Some housekeeping depending on output type
let og_output_type = output_type; // need this later for conversion
let mut output_type = output_type;
if output_type == "bedgraph" || output_type == "bw" || output_type == "bigwig" {
output_type = "bedGraph" // we must create bedgraphs first before creating bigwig files
}

if output_type == "npy"{
// populate hashmap for the npy meta data
let mut arc_npy_meta_data_locked = arc_npy_meta_data.lock().unwrap();
for chromosome in final_chromosomes.iter(){
let chr_name = chromosome.chrom.clone();
let current_chrom_size =
*chrom_sizes.get(&chromosome.chrom).unwrap() as i32;

arc_npy_meta_data_locked.insert(
chr_name,
HashMap::from([
("stepsize".to_string(), stepsize),
("reported_chrom_size".to_string(), current_chrom_size),
]),
);

}

}
// Pare down chromosomes if necessary
let mut final_chromosomes =
get_final_chromosomes(&input_filetype, filepath, &chrom_sizes, score);

let bar = ProgressBar::new(final_chromosomes.len() as u64);

Expand Down Expand Up @@ -373,7 +348,6 @@ pub fn uniwig_main(
"{}{}_{}.{}",
bwfileheader, chrom_name, "start", output_type
);

write_to_npy_file(
&count_result.0,
file_name.clone(),
Expand All @@ -382,8 +356,8 @@ pub fn uniwig_main(
primary_start.0,
smoothsize,
),
&mut chromosome_data_clone,
"start",
stepsize,
meta_data_file_names[0].clone(),
);
}
_ => {
Expand All @@ -400,8 +374,8 @@ pub fn uniwig_main(
primary_start.0,
smoothsize,
),
&mut chromosome_data_clone,
"start",
stepsize,
meta_data_file_names[0].clone(),
);
}
}
Expand Down Expand Up @@ -475,8 +449,8 @@ pub fn uniwig_main(
smoothsize,
0
),
&mut chromosome_data_clone,
"end",
stepsize,
meta_data_file_names[1].clone(),
);
}
_ => {
Expand All @@ -494,8 +468,8 @@ pub fn uniwig_main(
smoothsize,
0
),
&mut chromosome_data_clone,
"end",
stepsize,
meta_data_file_names[1].clone(),
);
}
}
Expand Down Expand Up @@ -562,8 +536,8 @@ pub fn uniwig_main(
primary_start.0,
0,
),
&mut chromosome_data_clone,
"core",
stepsize,
meta_data_file_names[2].clone(),
);
}
_ => {
Expand All @@ -580,8 +554,8 @@ pub fn uniwig_main(
primary_start.0,
0,
),
&mut chromosome_data_clone,
"core",
stepsize,
meta_data_file_names[2].clone(),
);
}
}
Expand Down Expand Up @@ -613,14 +587,6 @@ pub fn uniwig_main(
);
}
}
"npy" => {
//write combined metadata
let json_string = serde_json::to_string_pretty(&npy_meta_data).unwrap();
let combined_npy_meta_file_path = format!("{}{}.{}", bwfileheader, "npy_meta", "json");
let mut file = File::create(combined_npy_meta_file_path).unwrap();
file.write_all(json_string.as_bytes()).unwrap();

}
_ => {}
}
bar.finish();
Expand Down
34 changes: 23 additions & 11 deletions gtars/src/uniwig/writing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,44 @@ use std::fs::{create_dir_all, remove_file, File, OpenOptions};
use std::io::{BufWriter, Write};
use std::path::PathBuf;
use std::{fs, io};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};

/// Write output to npy files AND update the meta_data hashmap
/// Write output to npy files
pub fn write_to_npy_file(
counts: &[u32],
filename: String,
chromname: String,
start_position: i32,
npy_meta_data_map: &mut Arc<Mutex<HashMap<String, HashMap<String, i32>>>>,
out_selection: &str,
stepsize: i32,
metafilename: String,
) {
let mut chromosome_data_guard = npy_meta_data_map.lock().unwrap();

// For future reference `&Vec<u32>` is a SLICE and thus we must use the `to_vec` function below when creating an array
// https://users.rust-lang.org/t/why-does-std-to-vec-exist/45893/9

// Write the NumPy Files
let arr = Array::from_vec(counts.to_vec());
let _ = write_npy(filename, &arr);

// Write to the metadata hashmap
if let Some(current_chr_data) = chromosome_data_guard.get_mut(chromname.as_str()) {
current_chr_data.insert(out_selection.to_string(), start_position);
}
// Write to the metadata file.
// Note: there should be a single metadata file for starts, ends and core

let path = std::path::Path::new(&metafilename).parent().unwrap();
let _ = create_dir_all(path);

let mut file = OpenOptions::new()
.create(true) // Create the file if it doesn't exist
.append(true) // Append data to the existing file if it does exist
.open(metafilename)
.unwrap();

// The original wiggle file header. This can be anything we wish it to be. Currently space delimited.
let mut wig_header = "fixedStep chrom=".to_string()
+ chromname.as_str()
+ " start="
+ start_position.to_string().as_str()
+ " step="
+ stepsize.to_string().as_str();
wig_header.push('\n');
file.write_all(wig_header.as_ref()).unwrap();
}

/// Write either combined bedGraph, wiggle files, and bed files
Expand Down
12 changes: 5 additions & 7 deletions gtars/tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -673,12 +673,10 @@ mod tests {

let tempbedpath = format!("{}{}", path_to_crate, "/tests/data/test5.bed");
let combinedbedpath = tempbedpath.as_str();
let combinedbedpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy3.bed";
//let combinedbedpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/chr1415.bed";
//let combinedbedpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy4.bed";

let chromsizerefpath = combinedbedpath;
let chromsizerefpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy.chrom.sizes";
//let chromsizerefpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/test.chrom.sizes";
//let chromsizerefpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy.chrom.sizes";

let tempdir = tempfile::tempdir().unwrap();
let path = PathBuf::from(&tempdir.path());
Expand All @@ -687,16 +685,16 @@ mod tests {
let bwfileheader_path = path.into_os_string().into_string().unwrap();
let bwfileheader = bwfileheader_path.as_str();

let bwfileheader = "/home/drc/Downloads/unwig_testing_19dec2024/output/npy_output/";
//let bwfileheader = "/home/drc/Downloads/unwig_testing_19dec2024/output/npy_output/";
//let bwfileheader = "/home/drc/Downloads/unwig_testing_19dec2024/output/wig_output/";

let smoothsize: i32 = 10;
let smoothsize: i32 = 2;
let output_type = "npy";
//let output_type = "wig";
let filetype = "bed";
let num_threads = 6;
let score = false;
let stepsize = 5;
let stepsize = 1;
let zoom = 0;
let vec_count_type = vec!["start", "end", "core"];

Expand Down

0 comments on commit 391ba68

Please sign in to comment.