-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
generate small/fast table for changes_when_{uppercased,lowercased} #4
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,3 +21,6 @@ harness = false | |
[features] | ||
default = [] | ||
nightly = [] | ||
|
||
[workspace] | ||
members = [".", "tabgen"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
mod search; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. all this stuff can be much simplified if i give up on trying to reuse the search.rs file in both the generator and generatee (which is worth doing). like, that's the reason it's 3 files here, it should just be 1. |
||
mod table; | ||
use table::ChangesWhenTableType; | ||
#[inline] | ||
pub(super) fn changes_when_lowercased(c: char) -> bool { | ||
if c.is_ascii() { | ||
c.is_ascii_uppercase() | ||
} else { | ||
search::changes_when_casemapped_nonascii::</* lowercase = */ true>( | ||
c, | ||
table::CHANGES_WHEN_LOOKUP_TAB, | ||
) | ||
} | ||
} | ||
#[inline] | ||
pub(super) fn changes_when_uppercased(c: char) -> bool { | ||
if c.is_ascii() { | ||
c.is_ascii_lowercase() | ||
} else { | ||
search::changes_when_casemapped_nonascii::</* lowercase = */ false>( | ||
c, | ||
table::CHANGES_WHEN_LOOKUP_TAB, | ||
) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
#[test] | ||
fn test_paranoia() { | ||
if core::char::UNICODE_VERSION != super::table::UNICODE_VERSION { | ||
return; | ||
} | ||
for c in '\0'..=char::MAX { | ||
let lower = changes_when_lowercased_refimpl(c); | ||
let upper = changes_when_uppercased_refimpl(c); | ||
let fancy_lower = super::changes_when_lowercased(c); | ||
let fancy_upper = super::changes_when_uppercased(c); | ||
assert_eq!( | ||
(lower, upper), | ||
(fancy_lower, fancy_upper), | ||
"wrong for {:?} (U+{:04x})", | ||
c, | ||
c as u32 | ||
); | ||
} | ||
} | ||
|
||
fn changes_when_lowercased_refimpl(c: char) -> bool { | ||
!core::iter::once(c).eq(c.to_lowercase()) | ||
} | ||
|
||
fn changes_when_uppercased_refimpl(c: char) -> bool { | ||
!core::iter::once(c).eq(c.to_uppercase()) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
pub(super) fn changes_when_casemapped_nonascii<const MAP_LOWER: bool>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are two copies of this file (one in tabgen/src/search.rs) just to shut rust-analyzer up. its absolutely not needed. |
||
needle: char, | ||
tab: &super::ChangesWhenTableType, | ||
) -> bool { | ||
let Some(enc) = find_encoded_case_range(needle, tab) else { | ||
return false; | ||
}; | ||
const RK_UNIFORM_UPPER: u32 = 0; | ||
const RK_UNIFORM_LOWER: u32 = 1; | ||
const RK_ALT_UPPER_LOWER: u32 = 2; | ||
const RK_ALT_LOWER_UPPER: u32 = 3; | ||
const RK_UNIFORM_BOTH: u32 = 4; | ||
|
||
let range_st = enc >> 11; | ||
let range_len = enc & 0xff; | ||
let range_kind = (enc >> 8) & 0x7; | ||
debug_assert!(range_kind <= 4); | ||
let map_lower = MAP_LOWER; | ||
let map_upper = !MAP_LOWER; | ||
match range_kind { | ||
RK_UNIFORM_BOTH => true, | ||
RK_UNIFORM_UPPER => map_upper, | ||
RK_UNIFORM_LOWER => map_lower, | ||
RK_ALT_UPPER_LOWER | RK_ALT_LOWER_UPPER => { | ||
let offset = needle as u32 - range_st; | ||
debug_assert!(offset <= range_len); | ||
let odd = (offset & 1) != 0; | ||
let odd_is_lower = range_kind == RK_ALT_UPPER_LOWER; | ||
if MAP_LOWER { | ||
odd_is_lower == odd | ||
} else { | ||
odd_is_lower == !odd | ||
} | ||
} | ||
rk => { | ||
debug_assert!(false, "bad rangekind {:?}", rk); | ||
false | ||
} | ||
} | ||
} | ||
|
||
pub(super) fn find_encoded_case_range( | ||
needle: char, | ||
ranges: &super::ChangesWhenTableType, | ||
) -> Option<u32> { | ||
let pos = ranges.binary_search_by(|&entry| { | ||
let range_st = entry >> 11; | ||
let range_len = entry & 0xff; | ||
if range_st > (needle as u32) { | ||
core::cmp::Ordering::Greater | ||
} else if (range_st + range_len) <= (needle as u32) { | ||
core::cmp::Ordering::Less | ||
} else { | ||
core::cmp::Ordering::Equal | ||
} | ||
}); | ||
match pos { | ||
Err(_) => None, | ||
Ok(n) => Some(ranges[n]), | ||
} | ||
} |
Large diffs are not rendered by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i dont even remember touching this stuff but I guess I did.