Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
tibvdm committed Jun 7, 2024
1 parent 76dda83 commit ea33d3c
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 20 deletions.
8 changes: 4 additions & 4 deletions bitarray/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ impl BitArray {
///
/// A `Result` indicating whether the write operation was successful or not.
pub fn data_to_writer(
data: Vec<i64>,
data: &Vec<i64>,
bits_per_value: usize,
max_capacity: usize,
writer: &mut impl Write
Expand Down Expand Up @@ -314,7 +314,7 @@ mod tests {
let data = vec![0x1234567890, 0xabcdef0123, 0x4567890abc, 0xdef0123456];
let mut writer = Vec::new();

data_to_writer(data, 40, 2, &mut writer).unwrap();
data_to_writer(&data, 40, 2, &mut writer).unwrap();

assert_eq!(
writer,
Expand All @@ -341,7 +341,7 @@ mod tests {
];
let mut writer = Vec::new();

data_to_writer(data, 32, 8, &mut writer).unwrap();
data_to_writer(&data, 32, 8, &mut writer).unwrap();

assert_eq!(
writer,
Expand Down Expand Up @@ -380,7 +380,7 @@ mod tests {
];
let mut writer = Vec::new();

data_to_writer(data, 32, 8, &mut writer).unwrap();
data_to_writer(&data, 32, 8, &mut writer).unwrap();

assert_eq!(
writer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module swap cluster/gallade
#PBS -o stdout.$PBS_JOBID
#PBS -e stderr.$PBS_JOBID

prefix="$VSC_DATA_VO/bram/"
prefix="/kyukon/data/gent/vo/000/gvo00038/suffix-array"

# load Rust
module load Rust/1.75.0-GCCcore-12.3.0
Expand All @@ -38,4 +38,4 @@ cd $PBS_O_WORKDIR
cargo build --release

# execute
./target/release/suffixarray_builder -d "$prefix"uniprot_protein_database_minimal.tsv -t "$prefix"taxons.tsv --sparseness-factor 3 --construction-algorithm lib-div-suf-sort -o "$prefix"uniprot_suffix_array_sparse3.bin
./target/release/sa-builder -d "$prefix"/proteins.tsv -t "$prefix"/taxons.tsv -o "$prefix"/sa_sparse3_compressed.bin -s 3 -a lib-div-suf-sort -c
41 changes: 41 additions & 0 deletions sa-builder/build.pbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

#########################################################################################################
### This script is designed to run on the Ghent university HPC ###
### ###
### how to use: ###
### 1) Swap to the high-memory gallade cluster by executing `module swap cluster/gallade` ###
### 2) Navigate the to root of the project ###
### 3) Submit the job to the queue with `qsub suffixarray/build_suffix_array.pbs` ###
#########################################################################################################

# go to cluster with high memory
module swap cluster/gallade

# define requested memory, cpu resources and email notifications
#PBS -m abe
#PBS -l walltime=10:00:00
#PBS -l mem=750gb
# ask for 1 node, 1 cpu (not more needed since we don't have parallelism)
#PBS -l nodes=1:ppn=1
#PBS -N suffix_array_construction_uniprot

# define output and error files
#PBS -o stdout.$PBS_JOBID
#PBS -e stderr.$PBS_JOBID

prefix="/kyukon/data/gent/vo/000/gvo00038/suffix-array"

# load Rust
module load Rust/1.75.0-GCCcore-12.3.0
module load Clang/16.0.6-GCCcore-12.3.0 # needed to build the bindings from Rust to C
module load CMake/3.26.3-GCCcore-12.3.0

# go to current working dir and execute
cd $PBS_O_WORKDIR

# compile
cargo build --release

# execute
./target/release/sa-builder -d "$prefix"/proteins.tsv -t "$prefix"/taxons.tsv -o "$prefix"/sa_sparse3.bin -s 3 -a lib-div-suf-sort
24 changes: 16 additions & 8 deletions sa-builder/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{
File,
OpenOptions
},
io::Result
io, time::{SystemTime, SystemTimeError, UNIX_EPOCH}
};

use clap::Parser;
Expand Down Expand Up @@ -33,24 +33,27 @@ fn main() {

eprintln!();
eprintln!("📋 Started loading the taxon file...");
let start_taxon_time = get_time_ms().unwrap();
let taxon_id_calculator =
TaxonAggregator::try_from_taxonomy_file(&taxonomy, AggregationMethod::LcaStar)
.unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
eprintln!("✅ Successfully loaded the taxon file!");
eprintln!("✅ Successfully loaded the taxon file in {} seconds!", (get_time_ms().unwrap() - start_taxon_time) / 1000.0);
eprintln!("\tAggregation method: LCA*");

eprintln!();
eprintln!("📋 Started loading the proteins...");
let start_proteins_time = get_time_ms().unwrap();
let mut data =
Proteins::try_from_database_file_without_annotations(&database_file, &taxon_id_calculator)
.unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
eprintln!("✅ Successfully loaded the proteins!");
eprintln!("✅ Successfully loaded the proteins in {} seconds!", (get_time_ms().unwrap() - start_proteins_time) / 1000.0);

eprintln!();
eprintln!("📋 Started building the suffix array...");
let start_ssa_time = get_time_ms().unwrap();
let sa = build_ssa(&mut data, &construction_algorithm, sparseness_factor)
.unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
eprintln!("✅ Successfully built the suffix array!");
eprintln!("✅ Successfully built the suffix array in {} seconds!", (get_time_ms().unwrap() - start_ssa_time) / 1000.0);
eprintln!("\tAmount of items: {}", sa.len());
eprintln!("\tSample rate: {}", sparseness_factor);

Expand All @@ -60,29 +63,30 @@ fn main() {

eprintln!();
eprintln!("📋 Started dumping the suffix array...");
let start_dump_time = get_time_ms().unwrap();

if compress_sa {
let bits_per_value = (data.len() as f64).log2().ceil() as usize;

if let Err(err) =
dump_compressed_suffix_array(sa, sparseness_factor, bits_per_value, &mut file)
dump_compressed_suffix_array(&sa, sparseness_factor, bits_per_value, &mut file)
{
eprint_and_exit(err.to_string().as_str());
};

eprintln!("✅ Successfully dumped the suffix array!");
eprintln!("✅ Successfully dumped the suffix array in {} seconds!", (get_time_ms().unwrap() - start_dump_time) / 1000.0);
eprintln!("\tAmount of bits per item: {}", bits_per_value);
} else {
if let Err(err) = dump_suffix_array(&sa, sparseness_factor, &mut file) {
eprint_and_exit(err.to_string().as_str());
}

eprintln!("✅ Successfully dumped the suffix array!");
eprintln!("✅ Successfully dumped the suffix array in {} seconds!", (get_time_ms().unwrap() - start_dump_time) / 1000.0);
eprintln!("\tAmount of bits per item: 64");
}
}

fn open_file(file: &str) -> Result<File> {
fn open_file(file: &str) -> io::Result<File> {
OpenOptions::new()
.create(true)
.write(true)
Expand All @@ -94,3 +98,7 @@ fn eprint_and_exit(err: &str) -> ! {
eprintln!("{}", err);
std::process::exit(1);
}

pub fn get_time_ms() -> Result<f64, SystemTimeError> {
Ok(SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos() as f64 * 1e-6)
}
12 changes: 6 additions & 6 deletions sa-compression/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use sa_index::SuffixArray;
///
/// Returns an error if writing to the writer fails.
pub fn dump_compressed_suffix_array(
sa: Vec<i64>,
sa: &Vec<i64>,
sparseness_factor: u8,
bits_per_value: usize,
writer: &mut impl Write
Expand Down Expand Up @@ -146,7 +146,7 @@ mod tests {
let sa = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];

let mut writer = vec![];
dump_compressed_suffix_array(sa, 1, 8, &mut writer).unwrap();
dump_compressed_suffix_array(&sa, 1, 8, &mut writer).unwrap();

assert_eq!(
writer,
Expand All @@ -167,7 +167,7 @@ mod tests {
valid_write_count: 0
};

dump_compressed_suffix_array(vec![], 1, 8, &mut writer).unwrap();
dump_compressed_suffix_array(&vec![], 1, 8, &mut writer).unwrap();
}

#[test]
Expand All @@ -177,7 +177,7 @@ mod tests {
valid_write_count: 1
};

dump_compressed_suffix_array(vec![], 1, 8, &mut writer).unwrap();
dump_compressed_suffix_array(&vec![], 1, 8, &mut writer).unwrap();
}

#[test]
Expand All @@ -187,7 +187,7 @@ mod tests {
valid_write_count: 2
};

dump_compressed_suffix_array(vec![], 1, 8, &mut writer).unwrap();
dump_compressed_suffix_array(&vec![], 1, 8, &mut writer).unwrap();
}

#[test]
Expand All @@ -197,7 +197,7 @@ mod tests {
valid_write_count: 3
};

dump_compressed_suffix_array(vec![1], 1, 8, &mut writer).unwrap();
dump_compressed_suffix_array(&vec![1], 1, 8, &mut writer).unwrap();
}

#[test]
Expand Down

0 comments on commit ea33d3c

Please sign in to comment.