use icu_collator::{
options::CollatorOptions, options::Strength, Collator, CollatorBorrowed, CollatorPreferences,
};
use icu_locale_core::Locale;
use std::cmp::Ordering;
use std::fmt;
use std::str::FromStr;
#[derive(Debug)]
pub enum CollateError {
InvalidLocale(String),
Icu(String),
}
impl fmt::Display for CollateError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CollateError::InvalidLocale(s) => write!(f, "invalid locale: {s}"),
CollateError::Icu(s) => write!(f, "ICU collation error: {s}"),
}
}
}
impl std::error::Error for CollateError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum CollationStrength {
Primary,
Secondary,
#[default]
Tertiary,
Quaternary,
Identical,
}
impl From<CollationStrength> for Strength {
fn from(s: CollationStrength) -> Self {
match s {
CollationStrength::Primary => Strength::Primary,
CollationStrength::Secondary => Strength::Secondary,
CollationStrength::Tertiary => Strength::Tertiary,
CollationStrength::Quaternary => Strength::Quaternary,
CollationStrength::Identical => Strength::Identical,
}
}
}
pub struct IcuCollator {
inner: CollatorBorrowed<'static>,
}
impl IcuCollator {
pub fn new_for_locale(locale_id: &str) -> Result<Self, CollateError> {
Self::with_strength(locale_id, CollationStrength::Tertiary)
}
pub fn new(locale_id: &str) -> Result<Self, CollateError> {
Self::new_for_locale(locale_id)
}
pub fn with_strength(
locale_id: &str,
strength: CollationStrength,
) -> Result<Self, CollateError> {
let locale =
Locale::from_str(locale_id).map_err(|e| CollateError::InvalidLocale(format!("{e}")))?;
let prefs = CollatorPreferences::from(&locale);
let mut opts = CollatorOptions::default();
opts.strength = Some(Strength::from(strength));
let inner =
Collator::try_new(prefs, opts).map_err(|e| CollateError::Icu(format!("{e}")))?;
Ok(Self { inner })
}
pub fn compare(&self, a: &str, b: &str) -> Ordering {
self.inner.compare(a, b)
}
pub fn sort_key(&self, text: &str) -> Vec<u8> {
let mut key: Vec<u8> = Vec::new();
let _ = self.inner.write_sort_key_to(text, &mut key);
key
}
pub fn compare_sort_keys(a: &[u8], b: &[u8]) -> Ordering {
a.cmp(b)
}
}
impl Default for IcuCollator {
fn default() -> Self {
Self::new("en").unwrap_or_else(|_| {
Self::new("und").expect("root collator must always be constructible")
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::cmp::Ordering;
#[test]
fn sort_key_apple_before_banana() {
let collator = IcuCollator::new_for_locale("en").expect("English locale");
let apple = collator.sort_key("apple");
let banana = collator.sort_key("banana");
assert!(
apple < banana,
"\"apple\" sort key should be less than \"banana\" sort key"
);
}
#[test]
fn sort_key_equal_strings() {
let collator = IcuCollator::new_for_locale("en").expect("English locale");
let a = collator.sort_key("hello");
let b = collator.sort_key("hello");
assert_eq!(a, b, "equal strings must have equal sort keys");
}
#[test]
fn sort_key_nonempty_for_nonempty_input() {
let collator = IcuCollator::new_for_locale("en").expect("English locale");
let key = collator.sort_key("a");
assert!(
!key.is_empty(),
"sort key must be non-empty for non-empty input"
);
}
#[test]
fn sort_key_consistent_with_compare() {
let collator = IcuCollator::new_for_locale("en").expect("English locale");
let pairs = [("apple", "banana"), ("cat", "dog"), ("a", "b"), ("z", "zz")];
for (a, b) in pairs {
let cmp = collator.compare(a, b);
let ka = collator.sort_key(a);
let kb = collator.sort_key(b);
let key_cmp = ka.cmp(&kb);
assert_eq!(
cmp, key_cmp,
"compare({a:?}, {b:?}) = {cmp:?} but sort_key ordering = {key_cmp:?}"
);
}
}
#[test]
fn sort_key_empty_string() {
let collator = IcuCollator::new_for_locale("en").expect("English locale");
let key = collator.sort_key("");
let non_empty_key = collator.sort_key("a");
assert!(
key <= non_empty_key,
"sort key of empty string should be ≤ sort key of \"a\""
);
}
#[test]
fn swedish_collation_z_before_a_umlaut() {
let collator = IcuCollator::new_for_locale("sv").expect("Swedish locale");
assert_eq!(collator.compare("z", "ä"), Ordering::Less);
let kz = collator.sort_key("z");
let ka = collator.sort_key("ä");
assert!(kz < ka, "Swedish: sort key of z should be < sort key of ä");
}
#[test]
fn default_collator_compares_ascii() {
let c = IcuCollator::default();
assert_eq!(c.compare("a", "b"), Ordering::Less);
assert_eq!(c.compare("b", "a"), Ordering::Greater);
assert_eq!(c.compare("a", "a"), Ordering::Equal);
}
#[test]
fn primary_strength_ignores_accents() {
if let Ok(c) = IcuCollator::with_strength("en", CollationStrength::Primary) {
let ord = c.compare("e", "é");
assert_eq!(
ord,
Ordering::Equal,
"Primary strength: 'e' and 'é' should be equal (got {ord:?})"
);
}
}
#[test]
fn sort_key_ordering() {
let c = IcuCollator::default();
let ka = c.sort_key("apple");
let kb = c.sort_key("banana");
let key_ord = IcuCollator::compare_sort_keys(&ka, &kb);
let direct_ord = c.compare("apple", "banana");
assert_eq!(
key_ord, direct_ord,
"compare_sort_keys must agree with compare"
);
}
#[test]
fn swedish_collation() {
if let Ok(c) = IcuCollator::with_strength("sv", CollationStrength::Tertiary) {
let ord = c.compare("z", "ä");
assert_eq!(ord, Ordering::Less, "Swedish: 'z' should sort before 'ä'");
}
}
#[test]
fn german_phonebook_collation() {
if let Ok(c) = IcuCollator::with_strength("de-u-co-phonebk", CollationStrength::Tertiary) {
let _ = c.compare("ost", "oerst");
}
}
#[test]
fn japanese_collation() {
if let Ok(c) = IcuCollator::with_strength("ja", CollationStrength::Tertiary) {
let _ = c.compare("あ", "い");
}
}
#[test]
fn case_insensitive_primary() {
if let Ok(c) = IcuCollator::with_strength("en", CollationStrength::Primary) {
let ord = c.compare("Apple", "apple");
assert_eq!(
ord,
Ordering::Equal,
"Primary strength should ignore case (got {ord:?})"
);
}
}
#[test]
fn with_strength_new_alias_equivalent() {
let c1 = IcuCollator::new("en").expect("new");
let c2 = IcuCollator::new_for_locale("en").expect("new_for_locale");
assert_eq!(c1.compare("hello", "world"), c2.compare("hello", "world"));
}
}