Skip to content

Commit

Permalink
Try to get more hits
Browse files Browse the repository at this point in the history
  • Loading branch information
tibvdm committed Aug 26, 2024
1 parent 252d9c8 commit 6d8c54e
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 9 deletions.
2 changes: 0 additions & 2 deletions sa-index/src/bounds_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ impl<const K: u32> BoundsCache<K> {
// 20^1 + 20^2 + 20^3 + ... + 20^(K) = (20^(K + 1) - 20) / 19
let capacity = (20_u32.pow(K + 1) - 20) / 19;

eprintln!("Capacity: {}", capacity);

BoundsCache {
bounds: vec![None; capacity as usize],
ascii_array,
Expand Down
26 changes: 19 additions & 7 deletions sa-index/src/sa_searcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,11 @@ impl Searcher {

// Calculate stricter starting bounds for the 3-mers
// TODO: IL equality
let bounds = searcher.search_bounds(&kmer);
let bounds = searcher.search_bounds_no_cache(&kmer, (0, searcher.sa.len()));

if let BoundSearchResult::SearchResult((min_bound, max_bound)) = bounds {
let min_bound = if min_bound == 0 { 0 } else { min_bound - 1 };
searcher.kmer_cache.update_kmer(&kmer, (min_bound, max_bound));
} else {
searcher.kmer_cache.update_kmer(&kmer, (0, 0));
}
}

Expand Down Expand Up @@ -248,8 +246,8 @@ impl Searcher {
/// The first argument is true if a match was found
/// The second argument indicates the index of the minimum or maximum bound for the match
/// (depending on `bound`)
fn binary_search_bound(&self, bound: BoundSearch, search_string: &[u8]) -> (bool, usize) {
let (mut left, mut right) = self.kmer_cache.get_kmer(search_string).unwrap_or((0, self.sa.len()));
fn binary_search_bound(&self, bound: BoundSearch, search_string: &[u8], start_bounds: (usize, usize)) -> (bool, usize) {
let (mut left, mut right) = start_bounds;
let mut lcp_left: usize = 0;
let mut lcp_right: usize = 0;
let mut found = false;
Expand Down Expand Up @@ -301,17 +299,31 @@ impl Searcher {
/// Returns the minimum and maximum bound of all matches in the suffix array, or `NoMatches` if
/// no matches were found
pub fn search_bounds(&self, search_string: &[u8]) -> BoundSearchResult {
// If the string is empty, we don't need to search as nothing can be matched
if search_string.is_empty() {
return BoundSearchResult::NoMatches;
}

let (found_min, min_bound) = self.binary_search_bound(Minimum, search_string);
// Do a quick lookup in the kmer cache
// Use the (up to) first 5 characters of the search string as the kmer
// If the kmer is found in the cache, use the bounds from the cache as start bounds
// to find the bounds of the entire string
let max_mer = &search_string[..min(5, search_string.len())];
if let Some(bounds) = self.kmer_cache.get_kmer(max_mer) {
return self.search_bounds_no_cache(search_string, bounds);
}

BoundSearchResult::NoMatches
}

pub fn search_bounds_no_cache(&self, search_string: &[u8], start_bounds: (usize, usize)) -> BoundSearchResult {
let (found_min, min_bound) = self.binary_search_bound(Minimum, search_string, start_bounds);

if !found_min {
return BoundSearchResult::NoMatches;
}

let (_, max_bound) = self.binary_search_bound(Maximum, search_string);
let (_, max_bound) = self.binary_search_bound(Maximum, search_string, start_bounds);

BoundSearchResult::SearchResult((min_bound, max_bound + 1))
}
Expand Down

0 comments on commit 6d8c54e

Please sign in to comment.