diff --git a/sharding/src/hash/bkdrsubstr.rs b/sharding/src/hash/bkdrsubstr.rs new file mode 100644 index 000000000..400559620 --- /dev/null +++ b/sharding/src/hash/bkdrsubstr.rs @@ -0,0 +1,76 @@ +use super::DebugName; +use std::fmt::Display; + +///
用于支持key中部分字符串做hashkey,且hash算法类似bkdrsub的hash算法;
+/// bkdrsubstr-$markstr
+/// hashkey的计算方式:hashkey则是‘#’之后、$markstr之前的内容;
+/// 以bkdrsubstr-(((((为例
+/// key格式:abc#123_456(((((789,hashkey是123_456
+/// 格式注意:'#'需要存在,否则hashkey为空;'$markstr'可能不存在,如果'$markstr'不存在,则'#'之后的全部是hashkey
+#[derive(Clone, Default, Debug)] +pub struct Bkdrsubstr { + len: u8, + markerstr: Vec, + name: DebugName, +} + +impl Bkdrsubstr { + pub fn from(alg: &str) -> Self { + let alg_parts: Vec<&str> = alg.split(super::HASHER_NAME_DELIMITER).collect(); + assert!(alg_parts.len() == 2); + assert_eq!(alg_parts[0], "bkdrsubstr"); + let markerstr = alg_parts[1].as_bytes().to_vec(); + return Self { + len: markerstr.len() as u8, + markerstr, + name: alg.into(), + }; + } +} +impl super::Hash for Bkdrsubstr { + fn hash(&self, key: &S) -> i64 { + const SEED: i32 = 131; // 31 131 1313 13131 131313 etc.. + const START_CHAR_VAL: u8 = '#' as u8; + let mark_start_char: u8 = self.markerstr[0]; + + let is_substr = |a: &S, b: &[u8], l, start| { + for k in 1..l { + if a.at(k + start) != b[k] { + return false; + } + } + return true; + }; + + let mut hash = 0_i32; + let mut found_start_char = false; + // 轮询key中‘#’之后、‘_’之前的部分hashkey,如果没有'_'则一直计算到最后 + let key_len = key.len(); + for i in 0..key_len { + let c = key.at(i); + if found_start_char { + // hashkey 计算 + if c == mark_start_char { + if (i + self.len as usize) < key_len { + if is_substr(key, &self.markerstr, self.len as usize, i) { + break; + } + } + } + hash = hash.wrapping_mul(SEED).wrapping_add(c as i32); + } else if c == START_CHAR_VAL { + found_start_char = true; + continue; + } + // 没有找到#,持续轮询下一个字节 + } + + hash = hash & 0x7FFFFFFF; + hash as i64 + } +} +impl Display for Bkdrsubstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.name) + } +} diff --git a/sharding/src/hash/mod.rs b/sharding/src/hash/mod.rs index a8214ba43..1e92e97fa 100644 --- a/sharding/src/hash/mod.rs +++ b/sharding/src/hash/mod.rs @@ -1,6 +1,7 @@ pub mod bkdr; pub mod bkdrabscrc32; pub mod bkdrsub; +pub mod bkdrsubstr; pub mod crc32; pub mod crc32local; pub mod crc64; @@ -25,8 +26,8 @@ pub use rawsuffix::RawSuffix; pub mod crc; +use self::{bkdrsub::Bkdrsub, bkdrsubstr::Bkdrsubstr, crc64::Crc64, fnv1::Fnv1F32, fnv1::Fnv1aF64}; use enum_dispatch::enum_dispatch; -use self::{bkdrsub::Bkdrsub, crc64::Crc64, fnv1::Fnv1F32, fnv1::Fnv1aF64}; // 占位hash,主要用于兼容服务框架,供mq等业务使用 pub const HASH_PADDING: &str = "padding"; @@ -71,6 +72,7 @@ pub enum Hasher { Bkdr(Bkdr), Bkdrsub(Bkdrsub), BkdrAbsCrc32(BkdrAbsCrc32), // 混合三种hash:先bkdr,再abs,最后进行crc32计算 + Bkdrsubstr(Bkdrsubstr), // 类似Bkdrsub,但第二个分隔符不是'_",而是'-'后的字符串 Crc32(Crc32), Crc32Short(Crc32Short), // mc short crc32 Crc32Num(Crc32Num), // crc32 for a hash key whick is a num, @@ -162,6 +164,7 @@ impl Hasher { _ => Self::Crc32localDelimiter(Crc32localDelimiter::from(alg_lower.as_str())), }, "rawsuffix" => Self::RawSuffix(RawSuffix::from(alg_lower.as_str())), + "bkdrsubstr" => Self::Bkdrsubstr(Bkdrsubstr::from(alg_lower.as_str())), _ => { log::error!("found unknow hash: {} use crc32 instead", alg); Self::Crc32(Default::default()) diff --git a/tests/src/bkdrsub.rs b/tests/src/bkdrsub.rs index 732aa4aec..acb97100c 100644 --- a/tests/src/bkdrsub.rs +++ b/tests/src/bkdrsub.rs @@ -21,6 +21,23 @@ fn bkdrsub_one() { let dist_idx = dist.index(hash1); println!("key:{}, hash:{}, idx:{}", key1, hash1, dist_idx); + assert_eq!(dist_idx, 905) +} + +#[test] +fn bkdrsubstr_one() { + let hasher = Hasher::from("bkdrsubstr-((((("); + + let key1 = "otdn#1042015:carSubBrand(((((e4ab74c125e9e95edad691ffe9820118"; + let hash1 = hasher.hash(&key1.as_bytes()); + + let shards = 1080; + let servers = vec!["padding".to_string(); shards]; + let dist = Distribute::from("modrange-8640", &servers); + let dist_idx = dist.index(hash1); + + println!("key:{}, hash:{}, idx:{}", key1, hash1, dist_idx); + assert_eq!(dist_idx, 905) } // TODO 临时批量文件的hash、dist校验测试,按需打开 diff --git a/tests/src/layout.rs b/tests/src/layout.rs index 293972061..2539f66d5 100644 --- a/tests/src/layout.rs +++ b/tests/src/layout.rs @@ -48,7 +48,7 @@ fn checkout_basic() { assert_eq!(56, size_of::>()); assert_eq!(40, size_of::()); assert_eq!(192, size_of::()); - assert_eq!(24, size_of::()); + assert_eq!(32, size_of::()); } // 如果要验证 layout-min模式,需要 --features layout-min --release --no-default-features