Skip to content

Commit

Permalink
allow all sparseness factors
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonVandeVyver committed Oct 22, 2024
1 parent ae2e5fa commit f1dc185
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 18 deletions.
20 changes: 3 additions & 17 deletions libsais64-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,16 @@ pub mod bitpacking;
///
/// Returns Some with the suffix array build over the text if construction succeeds
/// Returns None if construction of the suffix array failed
pub fn sais64(text: &Vec<u8>, sparseness_factor: u8) -> Result<Vec<i64>, &str> {
let sparseness_factor = sparseness_factor as usize;
let mut libsais_sparseness = sparseness_factor;
let mut sa;
pub fn sais64(text: &Vec<u8>, libsais_sparseness: usize) -> Result<Vec<i64>, &str> {
let exit_code;
let mut sa;

if sparseness_factor * BITS_PER_CHAR <= 16 {
if libsais_sparseness * BITS_PER_CHAR <= 16 {
// bitpacked values fit in uint16_t
let packed_text = bitpack_text_16(text, libsais_sparseness);
sa = vec![0; packed_text.len()];
exit_code = unsafe { libsais16x64(packed_text.as_ptr(), sa.as_mut_ptr(), packed_text.len() as i64, 0, null_mut()) };
} else {
// bitpacked values do not fit in uint16_t, use 32-bit text
// set libsais_sparseness to highest sparseness factor fitting in 32-bit value and sparseness factor divisible by libsais sparseness
// max 28 out of 32 bits used, because a bucket is created for every element of the alfabet 8 * 2^28).
libsais_sparseness = 28 / BITS_PER_CHAR;
while sparseness_factor % libsais_sparseness != 0 && libsais_sparseness * BITS_PER_CHAR > 16 {
libsais_sparseness -= 1;
}

if libsais_sparseness * BITS_PER_CHAR <= 16 {
return Err("invalid sparseness factor");
}

let packed_text = bitpack_text_32(text, libsais_sparseness);
sa = vec![0; packed_text.len()];
let k = 1 << (libsais_sparseness * BITS_PER_CHAR);
Expand Down
26 changes: 25 additions & 1 deletion sa-builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub fn build_ssa(

// Build the suffix array using the selected algorithm
let mut sa = match construction_algorithm {
SAConstructionAlgorithm::LibSais => libsais64_rs::sais64(&text, sparseness_factor)?,
SAConstructionAlgorithm::LibSais => libsais64(&text, sparseness_factor)?,
SAConstructionAlgorithm::LibDivSufSort => libdivsufsort_rs::divsufsort64(text).ok_or("Building suffix array failed")?
};

Expand All @@ -66,6 +66,30 @@ pub fn build_ssa(
Ok(sa)
}

const MAX_SPARSENESS: usize = 5;
fn libsais64(text: &Vec<u8>, sparseness_factor: u8) -> Result<Vec<i64>, &str> {
let sparseness_factor = sparseness_factor as usize;

// set libsais_sparseness to highest sparseness factor fitting in 32-bit value and sparseness factor divisible by libsais sparseness
// max 28 out of 32 bits used, because a bucket is created for every element of the alfabet 8 * 2^28).
let mut libsais_sparseness = MAX_SPARSENESS;
while sparseness_factor % libsais_sparseness != 0 {
libsais_sparseness -= 1;
}
let sample_rate = sparseness_factor / libsais_sparseness;
println!(" Sparseness factor: {}", sparseness_factor);
println!(" Libsais sparseness factor: {}", libsais_sparseness);
println!(" Sample rate: {}", sample_rate);

let mut sa = libsais64_rs::sais64(&text, libsais_sparseness)?;

if sample_rate > 1 {
sample_sa(&mut sa, sample_rate as u8);
}

Ok(sa)
}

/// Translate all L's to I's in the given text
///
/// # Arguments
Expand Down

0 comments on commit f1dc185

Please sign in to comment.