From 391ba686a8adbc924abfb500f8a782af596fd994 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:52:03 -0500 Subject: [PATCH] Revert "attempt to use shared hashmap for #65 does not work" This reverts commit 27d52f5995ae9452de13de1f3ed43e195e9c2a99. --- gtars/Cargo.toml | 1 - gtars/src/uniwig/mod.rs | 64 +++++++++---------------------------- gtars/src/uniwig/writing.rs | 34 +++++++++++++------- gtars/tests/test.rs | 12 +++---- 4 files changed, 43 insertions(+), 68 deletions(-) diff --git a/gtars/Cargo.toml b/gtars/Cargo.toml index a5708eb..462af9a 100644 --- a/gtars/Cargo.toml +++ b/gtars/Cargo.toml @@ -32,7 +32,6 @@ base64-url = "2.0.0" sha2 = "0.10.7" md-5 = "0.10.5" seq_io = "0.3.2" -serde_json = "1.0.135" [dev-dependencies] diff --git a/gtars/src/uniwig/mod.rs b/gtars/src/uniwig/mod.rs index 1f728ae..38803c0 100644 --- a/gtars/src/uniwig/mod.rs +++ b/gtars/src/uniwig/mod.rs @@ -34,7 +34,6 @@ use std::str::FromStr; use std::sync::{Arc, Mutex}; use std::thread; use tokio::runtime; -use serde_json; pub mod cli; pub mod counting; @@ -249,17 +248,9 @@ pub fn uniwig_main( } }; - let mut npy_meta_data: HashMap> = HashMap::new(); - let mut arc_npy_meta_data = Arc::new(Mutex::new(npy_meta_data)); - let mut chromosome_data_clone = Arc::clone(&arc_npy_meta_data); - match input_filetype { //BED AND NARROWPEAK WORKFLOW Ok(FileType::BED) | Ok(FileType::NARROWPEAK) => { - // Pare down chromosomes if necessary - let mut final_chromosomes = - get_final_chromosomes(&input_filetype, filepath, &chrom_sizes, score); - // Some housekeeping depending on output type let og_output_type = output_type; // need this later for conversion let mut output_type = output_type; @@ -267,25 +258,9 @@ pub fn uniwig_main( output_type = "bedGraph" // we must create bedgraphs first before creating bigwig files } - if output_type == "npy"{ - // populate hashmap for the npy meta data - let mut arc_npy_meta_data_locked = arc_npy_meta_data.lock().unwrap(); - for chromosome in final_chromosomes.iter(){ - let chr_name = chromosome.chrom.clone(); - let current_chrom_size = - *chrom_sizes.get(&chromosome.chrom).unwrap() as i32; - - arc_npy_meta_data_locked.insert( - chr_name, - HashMap::from([ - ("stepsize".to_string(), stepsize), - ("reported_chrom_size".to_string(), current_chrom_size), - ]), - ); - - } - - } + // Pare down chromosomes if necessary + let mut final_chromosomes = + get_final_chromosomes(&input_filetype, filepath, &chrom_sizes, score); let bar = ProgressBar::new(final_chromosomes.len() as u64); @@ -373,7 +348,6 @@ pub fn uniwig_main( "{}{}_{}.{}", bwfileheader, chrom_name, "start", output_type ); - write_to_npy_file( &count_result.0, file_name.clone(), @@ -382,8 +356,8 @@ pub fn uniwig_main( primary_start.0, smoothsize, ), - &mut chromosome_data_clone, - "start", + stepsize, + meta_data_file_names[0].clone(), ); } _ => { @@ -400,8 +374,8 @@ pub fn uniwig_main( primary_start.0, smoothsize, ), - &mut chromosome_data_clone, - "start", + stepsize, + meta_data_file_names[0].clone(), ); } } @@ -475,8 +449,8 @@ pub fn uniwig_main( smoothsize, 0 ), - &mut chromosome_data_clone, - "end", + stepsize, + meta_data_file_names[1].clone(), ); } _ => { @@ -494,8 +468,8 @@ pub fn uniwig_main( smoothsize, 0 ), - &mut chromosome_data_clone, - "end", + stepsize, + meta_data_file_names[1].clone(), ); } } @@ -562,8 +536,8 @@ pub fn uniwig_main( primary_start.0, 0, ), - &mut chromosome_data_clone, - "core", + stepsize, + meta_data_file_names[2].clone(), ); } _ => { @@ -580,8 +554,8 @@ pub fn uniwig_main( primary_start.0, 0, ), - &mut chromosome_data_clone, - "core", + stepsize, + meta_data_file_names[2].clone(), ); } } @@ -613,14 +587,6 @@ pub fn uniwig_main( ); } } - "npy" => { - //write combined metadata - let json_string = serde_json::to_string_pretty(&npy_meta_data).unwrap(); - let combined_npy_meta_file_path = format!("{}{}.{}", bwfileheader, "npy_meta", "json"); - let mut file = File::create(combined_npy_meta_file_path).unwrap(); - file.write_all(json_string.as_bytes()).unwrap(); - - } _ => {} } bar.finish(); diff --git a/gtars/src/uniwig/writing.rs b/gtars/src/uniwig/writing.rs index 14b82c9..baebb37 100644 --- a/gtars/src/uniwig/writing.rs +++ b/gtars/src/uniwig/writing.rs @@ -8,20 +8,16 @@ use std::fs::{create_dir_all, remove_file, File, OpenOptions}; use std::io::{BufWriter, Write}; use std::path::PathBuf; use std::{fs, io}; -use std::collections::HashMap; -use std::sync::{Arc, Mutex}; -/// Write output to npy files AND update the meta_data hashmap +/// Write output to npy files pub fn write_to_npy_file( counts: &[u32], filename: String, chromname: String, start_position: i32, - npy_meta_data_map: &mut Arc>>>, - out_selection: &str, + stepsize: i32, + metafilename: String, ) { - let mut chromosome_data_guard = npy_meta_data_map.lock().unwrap(); - // For future reference `&Vec` is a SLICE and thus we must use the `to_vec` function below when creating an array // https://users.rust-lang.org/t/why-does-std-to-vec-exist/45893/9 @@ -29,11 +25,27 @@ pub fn write_to_npy_file( let arr = Array::from_vec(counts.to_vec()); let _ = write_npy(filename, &arr); - // Write to the metadata hashmap - if let Some(current_chr_data) = chromosome_data_guard.get_mut(chromname.as_str()) { - current_chr_data.insert(out_selection.to_string(), start_position); - } + // Write to the metadata file. + // Note: there should be a single metadata file for starts, ends and core + + let path = std::path::Path::new(&metafilename).parent().unwrap(); + let _ = create_dir_all(path); + + let mut file = OpenOptions::new() + .create(true) // Create the file if it doesn't exist + .append(true) // Append data to the existing file if it does exist + .open(metafilename) + .unwrap(); + // The original wiggle file header. This can be anything we wish it to be. Currently space delimited. + let mut wig_header = "fixedStep chrom=".to_string() + + chromname.as_str() + + " start=" + + start_position.to_string().as_str() + + " step=" + + stepsize.to_string().as_str(); + wig_header.push('\n'); + file.write_all(wig_header.as_ref()).unwrap(); } /// Write either combined bedGraph, wiggle files, and bed files diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index 433ce80..e869197 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -673,12 +673,10 @@ mod tests { let tempbedpath = format!("{}{}", path_to_crate, "/tests/data/test5.bed"); let combinedbedpath = tempbedpath.as_str(); - let combinedbedpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy3.bed"; - //let combinedbedpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/chr1415.bed"; + //let combinedbedpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy4.bed"; let chromsizerefpath = combinedbedpath; - let chromsizerefpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy.chrom.sizes"; - //let chromsizerefpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/test.chrom.sizes"; + //let chromsizerefpath = "/home/drc/Downloads/unwig_testing_19dec2024/input/dummy.chrom.sizes"; let tempdir = tempfile::tempdir().unwrap(); let path = PathBuf::from(&tempdir.path()); @@ -687,16 +685,16 @@ mod tests { let bwfileheader_path = path.into_os_string().into_string().unwrap(); let bwfileheader = bwfileheader_path.as_str(); - let bwfileheader = "/home/drc/Downloads/unwig_testing_19dec2024/output/npy_output/"; + //let bwfileheader = "/home/drc/Downloads/unwig_testing_19dec2024/output/npy_output/"; //let bwfileheader = "/home/drc/Downloads/unwig_testing_19dec2024/output/wig_output/"; - let smoothsize: i32 = 10; + let smoothsize: i32 = 2; let output_type = "npy"; //let output_type = "wig"; let filetype = "bed"; let num_threads = 6; let score = false; - let stepsize = 5; + let stepsize = 1; let zoom = 0; let vec_count_type = vec!["start", "end", "core"];