use natord::compare as natord_compare;
use natord::compare_ignore_case as natord_compare_ignore;
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
pub fn nat_lex_cmp(a: &str, b: &str) -> Ordering {
if a.len() == b.len() {
a.cmp(b) } else {
natord_compare(a, b) }
}
pub fn nat_lex_cmp_ignore(a: &str, b: &str) -> Ordering {
if a.len() == b.len() {
let mut a_chars = a.chars();
let mut b_chars = b.chars();
loop {
match (a_chars.next(), b_chars.next()) {
(Some(ca), Some(cb)) => {
let la = ca.to_ascii_lowercase();
let lb = cb.to_ascii_lowercase();
if la != lb {
return la.cmp(&lb);
}
}
(None, None) => break, (Some(_), None) => return Ordering::Greater, (None, Some(_)) => return Ordering::Less, }
}
a.cmp(b)
} else {
natord_compare_ignore(a, b) }
}
pub fn nat_lex_byte_cmp(a: &[u8], b: &[u8]) -> Ordering {
if a.len() == b.len() {
return a.cmp(b); }
let mut i = 0;
let mut j = 0;
while i < a.len() && j < b.len() {
let ca = a[i];
let cb = b[j];
if ca.is_ascii_digit() && cb.is_ascii_digit() {
let start_i = i;
let start_j = j;
while i < a.len() && a[i] == b'0' {
i += 1;
}
while j < b.len() && b[j] == b'0' {
j += 1;
}
let num_start_i = i;
let num_start_j = j;
while i < a.len() && a[i].is_ascii_digit() {
i += 1;
}
while j < b.len() && b[j].is_ascii_digit() {
j += 1;
}
let len_a = i - num_start_i;
let len_b = j - num_start_j;
if len_a != len_b {
return len_a.cmp(&len_b); }
for k in 0..len_a {
let da = a[num_start_i + k];
let db = b[num_start_j + k];
if da != db {
return da.cmp(&db);
}
}
} else {
if ca != cb {
return ca.cmp(&cb); }
i += 1;
j += 1;
}
}
a.cmp(b)
}
pub fn nat_lex_byte_cmp_ignore(a: &[u8], b: &[u8]) -> Ordering {
if a.len() == b.len() {
for idx in 0..a.len() {
let ca_lower = a[idx].to_ascii_lowercase();
let cb_lower = b[idx].to_ascii_lowercase();
if ca_lower != cb_lower {
return ca_lower.cmp(&cb_lower);
}
}
return a.cmp(b);
}
let mut i = 0;
let mut j = 0;
while i < a.len() && j < b.len() {
let ca_lower = a[i].to_ascii_lowercase();
let cb_lower = b[j].to_ascii_lowercase();
if a[i].is_ascii_digit() && b[j].is_ascii_digit() {
let start_i = i; let start_j = j;
while i < a.len() && a[i] == b'0' {
i += 1;
}
while j < b.len() && b[j] == b'0' {
j += 1;
}
let num_start_i = i;
let num_start_j = j;
while i < a.len() && a[i].is_ascii_digit() {
i += 1;
}
while j < b.len() && b[j].is_ascii_digit() {
j += 1;
}
let len_a = i - num_start_i;
let len_b = j - num_start_j;
if len_a != len_b {
return len_a.cmp(&len_b);
}
for k in 0..len_a {
let da = a[num_start_i + k];
let db = b[num_start_j + k];
if da != db {
return da.cmp(&db);
}
}
} else {
if ca_lower != cb_lower {
return ca_lower.cmp(&cb_lower);
}
i += 1;
j += 1;
}
}
a.cmp(b)
}
#[derive(Debug, Eq, Clone)]
pub struct NatLexOrderedString(pub String);
impl Ord for NatLexOrderedString {
fn cmp(&self, other: &Self) -> Ordering {
nat_lex_cmp(&self.0, &other.0)
}
}
impl PartialOrd for NatLexOrderedString {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq for NatLexOrderedString {
fn eq(&self, other: &Self) -> bool {
self.cmp(other) == Ordering::Equal
}
}
impl Hash for NatLexOrderedString {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
impl From<&str> for NatLexOrderedString {
fn from(value: &str) -> Self {
NatLexOrderedString(value.to_string())
}
}
impl From<Box<str>> for NatLexOrderedString {
fn from(value: Box<str>) -> Self {
NatLexOrderedString(value.into()) }
}
impl From<String> for NatLexOrderedString {
fn from(value: String) -> Self {
NatLexOrderedString(value)
}
}
impl From<NatLexOrderedString> for String {
fn from(value: NatLexOrderedString) -> Self {
value.0
}
}
impl From<NatLexOrderedString> for Box<str> {
fn from(value: NatLexOrderedString) -> Box<str> {
value.0.into_boxed_str()
}
}
pub fn nat_lex_sort<T: AsRef<str>>(slice: &mut [T]) {
slice.sort_by(|a, b| nat_lex_cmp(a.as_ref(), b.as_ref()));
}
pub fn nat_lex_sort_ignore_case<T: AsRef<str>>(slice: &mut [T]) {
slice.sort_by(|a, b| nat_lex_cmp_ignore(a.as_ref(), b.as_ref()));
}
pub fn nat_lex_sort_bytes(slice: &mut [&[u8]]) {
slice.sort_by(|a, b| nat_lex_byte_cmp(a, b));
}
pub fn nat_lex_sort_bytes_ignore_case(slice: &mut [&[u8]]) {
slice.sort_by(|a, b| nat_lex_byte_cmp_ignore(a, b));
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
macro_rules! vec_string {
($($x:expr),* $(,)?) => (vec![$($x.to_string()),*]);
}
macro_rules! vec_bytes {
($($x:expr),* $(,)?) => (vec![$($x.as_bytes()),*]);
}
#[test]
fn test_cmp_fixed_length() {
assert_eq!(nat_lex_cmp("id_abc", "id_abd"), Ordering::Less);
assert_eq!(nat_lex_cmp("id_abd", "id_abc"), Ordering::Greater);
assert_eq!(nat_lex_cmp("id_abc", "id_abc"), Ordering::Equal);
assert_eq!(
nat_lex_cmp("01JN4244RAKWNDR48TXFN2XJCY", "01JN7YC5RTJKNKKWNZ5FT9K2YS"),
Ordering::Less
); }
#[test]
fn test_cmp_variable_length_natural() {
assert_eq!(nat_lex_cmp("file1.txt", "file2.txt"), Ordering::Less);
assert_eq!(nat_lex_cmp("file2.txt", "file10.txt"), Ordering::Less);
assert_eq!(nat_lex_cmp("file10.txt", "file2.txt"), Ordering::Greater);
assert_eq!(nat_lex_cmp("file07.txt", "file7.txt"), Ordering::Less); assert_eq!(nat_lex_cmp("img12.png", "img100.png"), Ordering::Less);
assert_eq!(nat_lex_cmp("item 1", "item 2"), Ordering::Less);
assert_eq!(nat_lex_cmp("item 2", "item 10"), Ordering::Less);
}
#[test]
fn test_cmp_mixed_length_types() {
assert_eq!(nat_lex_cmp("id_abc", "id_10"), Ordering::Greater); assert_eq!(nat_lex_cmp("file2.txt", "file10.txt"), Ordering::Less); assert_eq!(nat_lex_cmp("ID001", "ID002"), Ordering::Less); assert_eq!(nat_lex_cmp("ID001", "file1"), Ordering::Less); }
#[test]
fn test_cmp_leading_zeros_natural() {
assert_eq!(nat_lex_cmp("v01", "v1"), Ordering::Less); assert_eq!(nat_lex_cmp("v1", "v001"), Ordering::Greater); }
#[test]
fn test_cmp_empty_strings() {
assert_eq!(nat_lex_cmp("", ""), Ordering::Equal);
assert_eq!(nat_lex_cmp("a", ""), Ordering::Greater);
assert_eq!(nat_lex_cmp("", "a"), Ordering::Less);
}
#[test]
fn test_cmp_ignore_fixed_length() {
assert_eq!(nat_lex_cmp_ignore("id_abc", "id_abd"), Ordering::Less);
assert_eq!(nat_lex_cmp_ignore("id_ABC", "id_abd"), Ordering::Less); assert_eq!(nat_lex_cmp_ignore("id_abd", "id_ABC"), Ordering::Greater); assert_eq!(nat_lex_cmp_ignore("id_abc", "id_abc"), Ordering::Equal);
assert_eq!(nat_lex_cmp_ignore("id_abc", "id_ABC"), Ordering::Greater);
assert_eq!(nat_lex_cmp("id_ABC", "id_abc"), Ordering::Less); assert_eq!(nat_lex_cmp_ignore("id_ABC", "id_abc"), Ordering::Less); }
#[test]
fn test_cmp_ignore_variable_length_natural() {
assert_eq!(nat_lex_cmp_ignore("file1.txt", "File2.txt"), Ordering::Less);
assert_eq!(
nat_lex_cmp_ignore("FILE2.txt", "file10.txt"),
Ordering::Less
);
assert_eq!(
nat_lex_cmp_ignore("file10.TXT", "FILE2.txt"),
Ordering::Greater
);
assert_eq!(
nat_lex_cmp_ignore("file07.txt", "FILE7.txt"),
Ordering::Less
); assert_eq!(
nat_lex_cmp_ignore("img12.png", "IMG100.png"),
Ordering::Less
);
}
#[test]
fn test_cmp_ignore_mixed_length_types() {
assert_eq!(nat_lex_cmp_ignore("id_abc", "ID_10"), Ordering::Greater, "Natural comparison: 'abc' > '10'");
assert_eq!(nat_lex_cmp_ignore("FILE2.txt", "file10.txt"), Ordering::Less, "Natural comparison: 2 < 10");
assert_eq!(nat_lex_cmp_ignore("ID001", "file100"), Ordering::Greater, "Natural comparison: 1 < 100");
assert_eq!(nat_lex_cmp_ignore("ID001", "id002"), Ordering::Less, "Lexicographical (i): '1' < '2'");
assert_eq!(nat_lex_cmp_ignore("id001", "file1"), Ordering::Greater, "Lexicographical (i): 'i' > 'f'");
assert_eq!(nat_lex_cmp_ignore("ID001", "FILE1"), Ordering::Greater, "Lexicographical (i): 'i' > 'f'");
}
#[test]
fn test_sort_strings_fixed_length() {
let mut keys = vec_string!["01JN7YC5RTJKNKKWNZ5FT9K2YS", "01JN4244RAKWNDR48TXFN2XJCY"];
nat_lex_sort(&mut keys);
assert_eq!(
keys,
vec_string!["01JN4244RAKWNDR48TXFN2XJCY", "01JN7YC5RTJKNKKWNZ5FT9K2YS"]
);
}
#[test]
fn test_sort_strings_variable_length() {
let mut keys = vec_string!["2note.txt", "1note.txt", "10note.txt"];
nat_lex_sort(&mut keys);
assert_eq!(keys, vec_string!["1note.txt", "2note.txt", "10note.txt"]);
}
#[test]
fn test_sort_strings_mixed_keys() {
let mut keys = vec_string![
"hub/note/2note.txt",
"hub/note/01JN7YC5RTJKNKKWNZ5FT9K2YS",
"hub/note/01JN4244RAKWNDR48TXFN2XJCY",
"hub/note/1note.txt",
"hub/note/10note.txt",
];
nat_lex_sort(&mut keys);
assert_eq!(
keys,
vec_string![
"hub/note/01JN4244RAKWNDR48TXFN2XJCY", "hub/note/01JN7YC5RTJKNKKWNZ5FT9K2YS",
"hub/note/1note.txt", "hub/note/2note.txt",
"hub/note/10note.txt",
]
);
}
#[test]
fn test_sort_strings_ignore_case() {
let mut keys = vec_string![
"Hub/Note/2note.txt",
"hub/note/01jn7yc5rtjkknkwnz5ft9k2ys", "hub/note/01JN4244RAKWNDR48TXFN2XJCY", "hub/note/1Note.txt",
"hub/note/10note.TXT",
];
nat_lex_sort_ignore_case(&mut keys);
assert_eq!(
keys,
vec_string![
"hub/note/01JN4244RAKWNDR48TXFN2XJCY",
"hub/note/01jn7yc5rtjkknkwnz5ft9k2ys",
"hub/note/1Note.txt",
"Hub/Note/2note.txt",
"hub/note/10note.TXT",
]
);
}
#[test]
fn test_byte_cmp_basic_numbers() {
assert_eq!(
nat_lex_byte_cmp(b"file7.txt", b"file10.txt"),
Ordering::Less
); assert_eq!(
nat_lex_byte_cmp(b"file10.txt", b"file7.txt"),
Ordering::Greater
); assert_eq!(
nat_lex_byte_cmp(b"file07.txt", b"file7.txt"),
Ordering::Less
); assert_eq!(
nat_lex_byte_cmp(b"img12.png", b"img100.png"),
Ordering::Less
); }
#[test]
fn test_byte_cmp_same_length() {
assert_eq!(
nat_lex_byte_cmp(b"file07.txt", b"file10.txt"),
Ordering::Less
); assert_eq!(nat_lex_byte_cmp(b"abc123def", b"abc124def"), Ordering::Less); assert_eq!(
nat_lex_byte_cmp(b"abc124def", b"abc123def"),
Ordering::Greater
);
assert_eq!(
nat_lex_byte_cmp(b"abc123def", b"abc123def"),
Ordering::Equal
);
}
#[test]
fn test_byte_cmp_mixed_behavior() {
assert_eq!(nat_lex_byte_cmp(b"id_10", b"id_abc"), Ordering::Less); assert_eq!(nat_lex_byte_cmp(b"id_abc", b"id_10"), Ordering::Greater);
assert_eq!(nat_lex_byte_cmp(b"v1.2", b"v1.10"), Ordering::Less);
assert_eq!(nat_lex_byte_cmp(b"v1.2", b"v1.10a"), Ordering::Less); }
#[test]
fn test_byte_cmp_numeric_prefix() {
assert_eq!(nat_lex_byte_cmp(b"1a", b"10a"), Ordering::Less); assert_eq!(nat_lex_byte_cmp(b"10a", b"1a"), Ordering::Greater);
assert_eq!(nat_lex_byte_cmp(b"a1", b"a1a"), Ordering::Less); assert_eq!(nat_lex_byte_cmp(b"a1a", b"a1"), Ordering::Greater);
}
#[test]
fn test_byte_cmp_ignore_case() {
assert_eq!(
nat_lex_byte_cmp_ignore(b"FileA.txt", b"filea.txt"),
Ordering::Less
); assert_eq!(
nat_lex_byte_cmp_ignore(b"filea.txt", b"FileA.txt"),
Ordering::Greater
);
assert_eq!(
nat_lex_byte_cmp_ignore(b"FILEA.TXT", b"fileb.txt"),
Ordering::Less
);
assert_eq!(
nat_lex_byte_cmp_ignore(b"file7.txt", b"FILE10.txt"),
Ordering::Less
); assert_eq!(
nat_lex_byte_cmp_ignore(b"IMG12.png", b"img100.png"),
Ordering::Less
);
assert_eq!(
nat_lex_byte_cmp_ignore(b"File07.txt", b"FILE7.txt"),
Ordering::Greater,
"Nat(i) logic: '07'=='7' num, fallback cmp 'i' > 'I'"
); }
#[test]
fn test_sort_bytes_simple() {
let mut keys = vec_bytes!["2note.txt", "1note.txt", "10note.txt"];
nat_lex_sort_bytes(&mut keys);
assert_eq!(keys, vec_bytes!["1note.txt", "2note.txt", "10note.txt"]);
}
#[test]
fn test_sort_bytes_mixed_keys() {
let mut keys = vec_bytes![
"hub/note/2note.txt",
"hub/note/01JN7YC5RTJKNKKWNZ5FT9K2YS",
"hub/note/01JN4244RAKWNDR48TXFN2XJCY",
"hub/note/1note.txt",
"hub/note/10note.txt",
];
nat_lex_sort_bytes(&mut keys);
let expected = vec_bytes![
"hub/note/01JN4244RAKWNDR48TXFN2XJCY", "hub/note/01JN7YC5RTJKNKKWNZ5FT9K2YS",
"hub/note/1note.txt", "hub/note/2note.txt",
"hub/note/10note.txt",
];
assert_eq!(keys, expected);
}
#[test]
fn test_sort_bytes_ignore_case() {
let mut keys = vec_bytes![
"Hub/Note/2note.txt",
"hub/note/01jn7yc5rtjkknkwnz5ft9k2ys",
"hub/note/01JN4244RAKWNDR48TXFN2XJCY",
"hub/note/1Note.txt",
"hub/note/10note.TXT",
];
nat_lex_sort_bytes_ignore_case(&mut keys);
let expected = vec_bytes![
"hub/note/01JN4244RAKWNDR48TXFN2XJCY", "hub/note/01jn7yc5rtjkknkwnz5ft9k2ys",
"hub/note/1Note.txt", "Hub/Note/2note.txt",
"hub/note/10note.TXT",
];
assert_eq!(keys, expected);
}
#[test]
fn test_ordered_string_sorting() {
let mut vec = vec![
NatLexOrderedString::from("file10.txt"),
NatLexOrderedString::from("file2.txt"), NatLexOrderedString::from("id002"),
NatLexOrderedString::from("id001"), ];
vec.sort();
let expected = vec![
NatLexOrderedString::from("file2.txt"),
NatLexOrderedString::from("file10.txt"),
NatLexOrderedString::from("id001"),
NatLexOrderedString::from("id002"),
];
assert_eq!(vec, expected);
}
#[test]
fn test_ordered_string_hash() {
let s1 = NatLexOrderedString::from("test1");
let s2 = NatLexOrderedString::from("test1");
let s3 = NatLexOrderedString::from("test2");
let s4 = NatLexOrderedString::from("test01"); let s5 = NatLexOrderedString::from("test1");
let mut set = HashSet::new();
assert!(set.insert(s1.clone()));
assert!(!set.insert(s2.clone())); assert!(set.insert(s3.clone()));
assert!(set.insert(s4.clone()));
assert_eq!(s1, s2);
assert_ne!(s1, s3);
assert_ne!(s1, s4); assert_eq!(s1, s5);
let hash = |s: &NatLexOrderedString| -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
s.hash(&mut hasher);
hasher.finish()
};
assert_eq!(hash(&s1), hash(&s2));
assert_eq!(hash(&s1), hash(&s5));
assert_ne!(hash(&s1), hash(&s3));
assert_ne!(hash(&s1), hash(&s4)); }
}