Skip to content

Commit

Permalink
use adapted libsais library
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonVandeVyver committed Oct 18, 2024
1 parent 9053d25 commit 0424cd3
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion libsais64-rs/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ fn main() -> Result<(), Box<dyn Error>> {

// clone the c library
Command::new("git")
.args(["clone", "https://github.com/IlyaGrebnov/libsais.git", "--depth=1"])
.args(["clone", "git@github.com:unipept/unipept-libsais.git", "libsais", "--depth=1"])
.status()
.expect("Failed to clone the libsais repository");

Expand Down
4 changes: 2 additions & 2 deletions libsais64-rs/libsais-wrapper.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "libsais/include/libsais64.h"
#include "libsais/include/libsais16x64.h"


int64_t libsais64(const uint8_t * T, int64_t * SA, int64_t n, int64_t fs, int64_t * freq);
int64_t libsais16x64(const uint16_t * T, int64_t * SA, int64_t n, int64_t fs, int64_t * freq);
5 changes: 3 additions & 2 deletions libsais64-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
use std::ptr::null_mut;
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

/// Builds the suffix array over the `text` using the libsais64 algorithm
Expand All @@ -13,9 +14,9 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
///
/// Returns Some with the suffix array build over the text if construction succeeds
/// Returns None if construction of the suffix array failed
pub fn sais64_long(text: &mut Vec<i64>, alphabet_size: i64, sparseness_factor: u8) -> Option<Vec<i64>> {
pub fn sais64(text: &Vec<u16>, sparseness_factor: u8) -> Option<Vec<i64>> {
let mut sa = vec![0; text.len()];
let exit_code = unsafe { libsais64_long(text.as_mut_ptr(), sa.as_mut_ptr(), text.len() as i64, alphabet_size, 0) };
let exit_code = unsafe { libsais16x64(text.as_ptr(), sa.as_mut_ptr(), text.len() as i64, 0, null_mut()) };
if exit_code == 0 {
let sparseness_factor = sparseness_factor as i64;
for elem in sa.iter_mut() {
Expand Down
10 changes: 5 additions & 5 deletions sa-builder/src/bitpacking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fn get_rank(c: u8) -> u8 {
}

pub const BITS_PER_CHAR: usize = 5;
pub fn bitpack_text(text: &Vec<u8>, sparseness_factor: u8) -> Vec<i64> {
pub fn bitpack_text(text: &Vec<u8>, sparseness_factor: u8) -> Vec<u16> {
let sparseness_factor = sparseness_factor as usize;
let num_ints = (text.len() + (sparseness_factor-1)) / sparseness_factor;
let mut text_packed = vec![0; num_ints];
Expand All @@ -21,19 +21,19 @@ pub fn bitpack_text(text: &Vec<u8>, sparseness_factor: u8) -> Vec<i64> {

for i in 0..(num_ints-1) {
let ti = i * sparseness_factor;
let mut element = 0i64;
let mut element = 0u16;
for j in 0..sparseness_factor {
let rank_c = get_rank(text[ti + j]) as i64;
let rank_c = get_rank(text[ti + j]) as u16;
element |= rank_c << (BITS_PER_CHAR * (sparseness_factor - 1 - j));
}
text_packed[i] = element;
}

// Handle the last element
let mut last_element = 0i64;
let mut last_element = 0u16;
let last_el_start = sparseness_factor * (num_ints - 1);
for i in 0..((text.len() - 1) % sparseness_factor + 1) {
let rank_c = get_rank(text[last_el_start + i]) as i64;
let rank_c = get_rank(text[last_el_start + i]) as u16;
last_element |= rank_c << (BITS_PER_CHAR * (sparseness_factor - 1 - i));
}
text_packed[num_ints - 1] = last_element;
Expand Down
5 changes: 2 additions & 3 deletions sa-builder/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::error::Error;
use crate::bitpacking::bitpack_text;
use crate::bitpacking::BITS_PER_CHAR;
use clap::{Parser, ValueEnum};

pub mod bitpacking;
Expand Down Expand Up @@ -59,9 +58,9 @@ pub fn build_ssa(
// Build the suffix array using the selected algorithm
let mut sa = match construction_algorithm {
SAConstructionAlgorithm::LibSais => {
let mut packed_text = bitpack_text(text, sparseness_factor);
let packed_text = bitpack_text(text, sparseness_factor);

libsais64_rs::sais64_long(&mut packed_text, (1 as i64) << (BITS_PER_CHAR * sparseness_factor as usize), sparseness_factor)
libsais64_rs::sais64(&packed_text, sparseness_factor)
},
SAConstructionAlgorithm::LibDivSufSort => libdivsufsort_rs::divsufsort64(text)
}
Expand Down

0 comments on commit 0424cd3

Please sign in to comment.