Skip to content

Commit

Permalink
Do not store parameter K as a constant, but a field.
Browse files Browse the repository at this point in the history
  • Loading branch information
tibvdm committed Aug 27, 2024
1 parent 53c9490 commit 54c5ef4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 29 deletions.
22 changes: 12 additions & 10 deletions sa-index/src/bounds_table.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
pub struct BoundsCache<const K: u32> {
pub struct BoundsCache {
pub bounds: Vec<Option<(usize, usize)>>,
pub base: usize,
pub k: usize,

ascii_array: [usize; 128],
powers_array: [usize; 10],
alphabet: Vec<u8>,
base: usize
alphabet: Vec<u8>
}

impl<const K: u32> BoundsCache<K> {
pub fn new(alphabet: String) -> BoundsCache<K> {
assert!(K < 10, "K must be less than 10");
impl BoundsCache {
pub fn new(alphabet: String, k: usize) -> BoundsCache {
assert!(k < 10, "K must be less than 10");

let alphabet = alphabet.to_uppercase().as_bytes().to_vec();
let base = alphabet.len();
Expand All @@ -25,14 +26,15 @@ impl<const K: u32> BoundsCache<K> {
}

// 20^1 + 20^2 + 20^3 + ... + 20^(K) = (20^(K + 1) - 20) / 19
let capacity = (base.pow(K + 1) - base) / (base - 1);
let capacity = (base.pow(k as u32 + 1) - base) / (base - 1);

BoundsCache {
Self {
bounds: vec![None; capacity],
ascii_array,
powers_array,
alphabet,
base
base,
k
}
}

Expand Down Expand Up @@ -88,7 +90,7 @@ mod tests {

#[test]
fn test_bounds_cache() {
let kmer_cache = BoundsCache::<5>::new("ACDEFGHIKLMNPQRSTVWY".to_string());
let kmer_cache = BoundsCache::new("ACDEFGHIKLMNPQRSTVWY".to_string(), 5);

for i in 0..20_usize.pow(5) {
let kmer = kmer_cache.index_to_kmer(i);
Expand Down
37 changes: 18 additions & 19 deletions sa-index/src/sa_searcher.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::{cmp::min, ops::Deref};
use std::str::from_utf8;
use sa_mappings::proteins::{Protein, Proteins};

use crate::{
Expand Down Expand Up @@ -75,9 +74,9 @@ impl PartialEq for SearchAllSuffixesResult {
pub struct SparseSearcher(Searcher);

impl SparseSearcher {
pub fn new(sa: SuffixArray, proteins: Proteins) -> Self {
pub fn new(sa: SuffixArray, proteins: Proteins, k: usize) -> Self {
let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein), k);
Self(searcher)
}
}
Expand All @@ -93,9 +92,9 @@ impl Deref for SparseSearcher {
pub struct DenseSearcher(Searcher);

impl DenseSearcher {
pub fn new(sa: SuffixArray, proteins: Proteins) -> Self {
pub fn new(sa: SuffixArray, proteins: Proteins, k: usize) -> Self {
let suffix_index_to_protein = DenseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein), k);
Self(searcher)
}
}
Expand All @@ -122,7 +121,7 @@ impl Deref for DenseSearcher {
/// the functional analysis provided by Unipept
pub struct Searcher {
pub sa: SuffixArray,
pub kmer_cache: BoundsCache<5>,
pub kmer_cache: BoundsCache,
pub proteins: Proteins,
pub suffix_index_to_protein: Box<dyn SuffixToProteinIndex>
}
Expand All @@ -144,15 +143,15 @@ impl Searcher {
/// # Returns
///
/// Returns a new Searcher object
pub fn new(sa: SuffixArray, proteins: Proteins, suffix_index_to_protein: Box<dyn SuffixToProteinIndex>) -> Self {
pub fn new(sa: SuffixArray, proteins: Proteins, suffix_index_to_protein: Box<dyn SuffixToProteinIndex>, k: usize) -> Self {
// Create a KTable with all possible 3-mers
let mut kmer_cache = BoundsCache::new("ACDEFGHIKLMNPQRSTVWY".to_string());
let mut kmer_cache = BoundsCache::new("ACDEFGHIKLMNPQRSTVWY".to_string(), k);

Check warning on line 148 in sa-index/src/sa_searcher.rs

View workflow job for this annotation

GitHub Actions / Check + test

variable does not need to be mutable

// Create the Searcher object
let mut searcher = Self { sa, kmer_cache, proteins, suffix_index_to_protein };

// Update the bounds for all 3-mers in the KTable
for i in 0..20_usize.pow(5) {
for i in 0..searcher.kmer_cache.base.pow(k as u32) {
let kmer = searcher.kmer_cache.index_to_kmer(i);

// Calculate stricter starting bounds for the 3-mers
Expand Down Expand Up @@ -308,7 +307,7 @@ impl Searcher {
// Use the (up to) first 5 characters of the search string as the kmer
// If the kmer is found in the cache, use the bounds from the cache as start bounds
// to find the bounds of the entire string
let mut max_mer_length = min(5, search_string.len());
let mut max_mer_length = min(self.kmer_cache.k, search_string.len());

Check warning on line 310 in sa-index/src/sa_searcher.rs

View workflow job for this annotation

GitHub Actions / Check + test

variable does not need to be mutable
if let Some(bounds) = self.kmer_cache.get_kmer(&search_string[..max_mer_length]) {
return self.search_bounds_no_cache(search_string, bounds);
}
Expand Down Expand Up @@ -568,7 +567,7 @@ mod tests {
let sa = SuffixArray::Original(vec![19, 10, 2, 13, 9, 8, 11, 5, 0, 3, 12, 15, 6, 1, 4, 17, 14, 16, 7, 18], 1);

let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein), 3);

// search bounds 'A'
let bounds_res = searcher.search_bounds(&[b'A']);
Expand All @@ -590,7 +589,7 @@ mod tests {
let sa = SuffixArray::Original(vec![9, 0, 3, 12, 15, 6, 18], 3);

let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein), 3);

// search suffix 'VAA'
let found_suffixes = searcher.search_matching_suffixes(&[b'V', b'A', b'A'], usize::MAX, false);
Expand All @@ -607,7 +606,7 @@ mod tests {
let sa = SuffixArray::Original(vec![19, 10, 2, 13, 9, 8, 11, 5, 0, 3, 12, 15, 6, 1, 4, 17, 14, 16, 7, 18], 1);

let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein), 3);

let bounds_res = searcher.search_bounds(&[b'I']);
assert_eq!(bounds_res, BoundSearchResult::SearchResult((13, 16)));
Expand All @@ -623,7 +622,7 @@ mod tests {
let sa = SuffixArray::Original(vec![9, 0, 3, 12, 15, 6, 18], 3);

let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein), 3);

// search bounds 'RIZ' with equal I and L
let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Z'], usize::MAX, true);
Expand All @@ -650,7 +649,7 @@ mod tests {

let sparse_sa = SuffixArray::Original(vec![0, 2, 4], 2);
let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein), 3);

// search bounds 'IM' with equal I and L
let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'M'], usize::MAX, true);
Expand All @@ -672,7 +671,7 @@ mod tests {

let sparse_sa = SuffixArray::Original(vec![6, 0, 1, 5, 4, 3, 2], 1);
let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein), 3);

let found_suffixes = searcher.search_matching_suffixes(&[b'I'], usize::MAX, true);
assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![2, 3, 4, 5]));
Expand All @@ -693,7 +692,7 @@ mod tests {

let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein), 3);

let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true);
assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0, 1, 2, 3, 4]));
Expand All @@ -714,7 +713,7 @@ mod tests {

let sparse_sa = SuffixArray::Original(vec![6, 4, 2, 0], 2);
let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein), 3);

// search all places where II is in the string IIIILL, but with a sparse SA
// this way we check if filtering the suffixes works as expected
Expand All @@ -737,7 +736,7 @@ mod tests {

let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein), 3);

// search bounds 'IM' with equal I and L
let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true);
Expand Down

0 comments on commit 54c5ef4

Please sign in to comment.