Skip to content

Commit

Permalink
take ownership of text when bitpacking, don't keep both texts in memory
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonVandeVyver committed Oct 25, 2024
1 parent 4fd936f commit 0fc7431
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 17 deletions.
6 changes: 3 additions & 3 deletions libsais64-rs/src/bitpacking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fn get_rank(c: u8) -> u8 {
pub const BITS_PER_CHAR: usize = 5;

// Bitpack text in a vector of u8 elements. BITS_PER_CHAR * sparseness_factor <= 8.
pub fn bitpack_text_8(text: &[u8], sparseness_factor: usize) -> Vec<u8> {
pub fn bitpack_text_8(text: Vec<u8>, sparseness_factor: usize) -> Vec<u8> {
assert!(BITS_PER_CHAR * sparseness_factor <= 8);

let num_ints = (text.len() + (sparseness_factor - 1)) / sparseness_factor;
Expand Down Expand Up @@ -43,7 +43,7 @@ pub fn bitpack_text_8(text: &[u8], sparseness_factor: usize) -> Vec<u8> {
}

// Bitpack text in a vector of u16 elements. BITS_PER_CHAR * sparseness_factor <= 16.
pub fn bitpack_text_16(text: &[u8], sparseness_factor: usize) -> Vec<u16> {
pub fn bitpack_text_16(text: Vec<u8>, sparseness_factor: usize) -> Vec<u16> {
assert!(BITS_PER_CHAR * sparseness_factor <= 16);

let num_ints = (text.len() + (sparseness_factor - 1)) / sparseness_factor;
Expand Down Expand Up @@ -75,7 +75,7 @@ pub fn bitpack_text_16(text: &[u8], sparseness_factor: usize) -> Vec<u16> {
}

// Bitpack text in a vector of u32 elements. BITS_PER_CHAR * sparseness_factor <= 32.
pub fn bitpack_text_32(text: &[u8], sparseness_factor: usize) -> Vec<u32> {
pub fn bitpack_text_32(text: Vec<u8>, sparseness_factor: usize) -> Vec<u32> {
assert!(BITS_PER_CHAR * sparseness_factor <= 32);

let num_ints = (text.len() + (sparseness_factor - 1)) / sparseness_factor;
Expand Down
10 changes: 4 additions & 6 deletions libsais64-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,17 @@ pub mod bitpacking;
///
/// Returns Some with the suffix array build over the text if construction succeeds
/// Returns None if construction of the suffix array failed
pub fn sais64(text: &Vec<u8>, libsais_sparseness: usize) -> Result<Vec<i64>, &str> {
pub fn sais64(text: Vec<u8>, libsais_sparseness: usize) -> Result<Vec<i64>, &'static str> {
let exit_code;
let mut sa;

let required_bits = libsais_sparseness * BITS_PER_CHAR;
if required_bits <= 8 {
// bitpacked values fit in uint8_t
let packed_text_data;
let packed_text = if libsais_sparseness == 1 {
text
} else {
packed_text_data = bitpack_text_8(text, libsais_sparseness);
&packed_text_data
bitpack_text_8(text, libsais_sparseness)
};

sa = vec![0; packed_text.len()];
Expand Down Expand Up @@ -68,8 +66,8 @@ mod tests {
#[test]
fn check_build_sa_with_libsais64() {
let sparseness_factor = 4;
let mut text = "BANANA-BANANA$".as_bytes().to_vec();
let sa = sais64(&mut text, sparseness_factor);
let text = "BANANA-BANANA$".as_bytes().to_vec();
let sa = sais64(text, sparseness_factor);
let correct_sa: Vec<i64> = vec![12, 8, 0, 4];
assert_eq!(sa, Ok(correct_sa));
}
Expand Down
8 changes: 4 additions & 4 deletions sa-builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,18 @@ pub enum SAConstructionAlgorithm {
///
/// The errors that occurred during the building of the suffix array itself
pub fn build_ssa(
text: &mut Vec<u8>,
mut text: Vec<u8>,
construction_algorithm: &SAConstructionAlgorithm,
sparseness_factor: u8
) -> Result<Vec<i64>, Box<dyn Error>> {
// translate all L's to a I
translate_l_to_i(text);
translate_l_to_i(&mut text);

// Build the suffix array using the selected algorithm
let mut sa = match construction_algorithm {
SAConstructionAlgorithm::LibSais => libsais64(text, sparseness_factor)?,
SAConstructionAlgorithm::LibDivSufSort => {
libdivsufsort_rs::divsufsort64(text).ok_or("Building suffix array failed")?
libdivsufsort_rs::divsufsort64(&mut text).ok_or("Building suffix array failed")?
}
};

Expand All @@ -71,7 +71,7 @@ pub fn build_ssa(

// Max sparseness for libsais because it creates a bucket for each element of the alphabet (2 ^ (sparseness * bits_per_char) buckets).
const MAX_SPARSENESS: usize = 5;
fn libsais64(text: &Vec<u8>, sparseness_factor: u8) -> Result<Vec<i64>, &str> {
fn libsais64(text: Vec<u8>, sparseness_factor: u8) -> Result<Vec<i64>, &'static str> {
let sparseness_factor = sparseness_factor as usize;

// set libsais_sparseness to highest sparseness factor fitting in 32-bit value and sparseness factor divisible by libsais sparseness
Expand Down
7 changes: 3 additions & 4 deletions sa-builder/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ fn main() {
eprintln!();
eprintln!("📋 Started loading the proteins...");
let start_proteins_time = get_time_ms().unwrap();
let mut data = Proteins::try_from_database_file_uncompressed(&database_file)
let data = Proteins::try_from_database_file_uncompressed(&database_file)
.unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
let bits_per_value = (data.len() as f64).log2().ceil() as usize;
eprintln!(
"✅ Successfully loaded the proteins in {} seconds!",
(get_time_ms().unwrap() - start_proteins_time) / 1000.0
Expand All @@ -31,7 +32,7 @@ fn main() {
eprintln!();
eprintln!("📋 Started building the suffix array...");
let start_ssa_time = get_time_ms().unwrap();
let sa = build_ssa(&mut data, &construction_algorithm, sparseness_factor)
let sa = build_ssa(data, &construction_algorithm, sparseness_factor)
.unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
eprintln!(
"✅ Successfully built the suffix array in {} seconds!",
Expand All @@ -48,8 +49,6 @@ fn main() {
let start_dump_time = get_time_ms().unwrap();

if compress_sa {
let bits_per_value = (data.len() as f64).log2().ceil() as usize;

if let Err(err) = dump_compressed_suffix_array(sa, sparseness_factor, bits_per_value, &mut file) {
eprint_and_exit(err.to_string().as_str());
};
Expand Down

0 comments on commit 0fc7431

Please sign in to comment.