From 334a9174b9e50baf5e0d5824bb14b0e5f1bb8fb3 Mon Sep 17 00:00:00 2001 From: Jessica Rodriguez Date: Sat, 30 Nov 2024 11:37:51 -0500 Subject: [PATCH] fix versionsort chunk split on non-ASCII numerics --- src/sort.rs | 11 ++++++- .../source/versionsort_non_ascii_numerics.rs | 33 +++++++++++++++++++ .../target/versionsort_non_ascii_numerics.rs | 33 +++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 tests/source/versionsort_non_ascii_numerics.rs create mode 100644 tests/target/versionsort_non_ascii_numerics.rs diff --git a/src/sort.rs b/src/sort.rs index 670f664a119..0cdd937b4f5 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -65,7 +65,7 @@ impl<'a> VersionChunkIter<'a> { break; } - if !c.is_numeric() { + if !c.is_ascii_digit() { continue; } @@ -283,6 +283,10 @@ mod test { source: "009" }) ); + + // '๙' = U+0E59 THAI DIGIT NINE, General Category Nd + let mut iter = VersionChunkIter::new("x๙v"); + assert_eq!(iter.next(), Some(VersionChunk::Str("x๙v"))); } #[test] @@ -297,6 +301,11 @@ mod test { input.sort_by(|a, b| version_sort(a, b)); assert_eq!(input, expected); + let mut input = vec!["x๙x", "xéx", "x0x"]; + let expected = vec!["x0x", "xéx", "x๙x"]; + input.sort_by(|a, b| version_sort(a, b)); + assert_eq!(input, expected); + let mut input = vec!["applesauce", "apple"]; let expected = vec!["apple", "applesauce"]; input.sort_by(|a, b| version_sort(a, b)); diff --git a/tests/source/versionsort_non_ascii_numerics.rs b/tests/source/versionsort_non_ascii_numerics.rs new file mode 100644 index 00000000000..a12a0361789 --- /dev/null +++ b/tests/source/versionsort_non_ascii_numerics.rs @@ -0,0 +1,33 @@ +use std::cmp::Ordering; +use print๙msg::print as first_print; +use print0msg::print as second_print; +use printémsg::print as third_print; + +fn main() { + first_print(); + second_print(); + third_print(); + + assert_eq!("print๙msg".cmp("printémsg"), Ordering::Greater); +} + +/// '๙' = 0E59;THAI DIGIT NINE;Nd; +mod print๙msg { + pub fn print() { + println!("Non-ASCII Decimal_Number") + } +} + +/// '0' = 0030;DIGIT ZERO;Nd; +mod print0msg { + pub fn print() { + println!("ASCII Decimal_Number") + } +} + +/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; +mod printémsg { + pub fn print() { + println!("Lowercase_Letter") + } +} \ No newline at end of file diff --git a/tests/target/versionsort_non_ascii_numerics.rs b/tests/target/versionsort_non_ascii_numerics.rs new file mode 100644 index 00000000000..f027ea93869 --- /dev/null +++ b/tests/target/versionsort_non_ascii_numerics.rs @@ -0,0 +1,33 @@ +use print0msg::print as second_print; +use printémsg::print as third_print; +use print๙msg::print as first_print; +use std::cmp::Ordering; + +fn main() { + first_print(); + second_print(); + third_print(); + + assert_eq!("print๙msg".cmp("printémsg"), Ordering::Greater); +} + +/// '๙' = 0E59;THAI DIGIT NINE;Nd; +mod print๙msg { + pub fn print() { + println!("Non-ASCII Decimal_Number") + } +} + +/// '0' = 0030;DIGIT ZERO;Nd; +mod print0msg { + pub fn print() { + println!("ASCII Decimal_Number") + } +} + +/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; +mod printémsg { + pub fn print() { + println!("Lowercase_Letter") + } +}