From aa6ef9ffbc4682ebc5cd33d1beb641bc4d20ee7e Mon Sep 17 00:00:00 2001 From: Simon Van de Vyver Date: Thu, 24 Oct 2024 11:17:08 +0200 Subject: [PATCH] add comments + do not bitpack text sparseness is 1 --- libsais64-rs/src/bitpacking.rs | 8 ++++++++ libsais64-rs/src/lib.rs | 14 +++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/libsais64-rs/src/bitpacking.rs b/libsais64-rs/src/bitpacking.rs index 9ee002e..d6d4cc8 100644 --- a/libsais64-rs/src/bitpacking.rs +++ b/libsais64-rs/src/bitpacking.rs @@ -9,8 +9,12 @@ fn get_rank(c: u8) -> u8 { } } +// Amount of bits necessary to represent one character in the protein text. pub const BITS_PER_CHAR: usize = 5; + +// Bitpack text in a vector of u8 elements. BITS_PER_CHAR * sparseness_factor <= 8. pub fn bitpack_text_8(text: &Vec, sparseness_factor: usize) -> Vec { + assert!(BITS_PER_CHAR * sparseness_factor <= 8); let num_ints = (text.len() + (sparseness_factor-1)) / sparseness_factor; let mut text_packed = vec![0; num_ints]; @@ -42,7 +46,9 @@ pub fn bitpack_text_8(text: &Vec, sparseness_factor: usize) -> Vec { } +// Bitpack text in a vector of u16 elements. BITS_PER_CHAR * sparseness_factor <= 16. pub fn bitpack_text_16(text: &Vec, sparseness_factor: usize) -> Vec { + assert!(BITS_PER_CHAR * sparseness_factor <= 16); let num_ints = (text.len() + (sparseness_factor-1)) / sparseness_factor; let mut text_packed = vec![0; num_ints]; @@ -74,7 +80,9 @@ pub fn bitpack_text_16(text: &Vec, sparseness_factor: usize) -> Vec { } +// Bitpack text in a vector of u16 elements. BITS_PER_CHAR * sparseness_factor <= 32. pub fn bitpack_text_32(text: &Vec, sparseness_factor: usize) -> Vec { + assert!(BITS_PER_CHAR * sparseness_factor <= 32); let num_ints = (text.len() + (sparseness_factor-1)) / sparseness_factor; let mut text_packed = vec![0; num_ints]; diff --git a/libsais64-rs/src/lib.rs b/libsais64-rs/src/lib.rs index f6b6783..d9eca5a 100644 --- a/libsais64-rs/src/lib.rs +++ b/libsais64-rs/src/lib.rs @@ -8,7 +8,7 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs")); pub mod bitpacking; -/// Builds the suffix array over the `text` using the libsais64 algorithm +/// Builds the suffix array over the `text` using the libsais algorithm /// /// # Arguments /// * `text` - The text used for suffix array construction @@ -24,7 +24,7 @@ pub fn sais64(text: &Vec, libsais_sparseness: usize) -> Result, &st let required_bits = libsais_sparseness * BITS_PER_CHAR; if required_bits <= 8 { // bitpacked values fit in uint8_t - let packed_text = bitpack_text_8(text, libsais_sparseness); + let packed_text = if libsais_sparseness == 1 { text } else { &bitpack_text_8(text, libsais_sparseness) }; sa = vec![0; packed_text.len()]; exit_code = unsafe { libsais64(packed_text.as_ptr(), sa.as_mut_ptr(), packed_text.len() as i64, 0, null_mut()) }; } else if required_bits <= 16 { @@ -54,14 +54,10 @@ mod tests { #[test] fn check_build_sa_with_libsais64() { - let bits_per_char = 5; let sparseness_factor = 4; - let mut text = [100834, // BANA - 493603, // NA-B - 80975, // ANAN - 65536 // A$ - ].to_vec(); + let mut text = "BANANA-BANANA$".as_bytes().to_vec(); let sa = sais64(&mut text, sparseness_factor); - assert_eq!(sa, Some(vec![12, 8, 0, 4])); + let correct_sa: Vec = vec![12, 8, 0, 4]; + assert_eq!(sa, Ok(correct_sa)); } }