use alloc::collections::VecDeque;
use alloc::vec::Vec;
use crate::elements::CharacterAndClassAndTrieValue;
use crate::elements::CollationElement32;
use crate::elements::Tag;
use crate::elements::BACKWARD_COMBINING_MARKER;
use crate::elements::CE_BUFFER_SIZE;
use crate::elements::FALLBACK_CE32;
use crate::elements::NON_ROUND_TRIP_MARKER;
use crate::elements::{
char_from_u32, CollationElement, CollationElements, NonPrimary, FFFD_CE32,
HANGUL_SYLLABLE_MARKER, HIGH_ZEROS_MASK, JAMO_COUNT, LOW_ZEROS_MASK, NO_CE, NO_CE_PRIMARY,
NO_CE_QUATERNARY, NO_CE_SECONDARY, NO_CE_TERTIARY, OPTIMIZED_DIACRITICS_MAX_COUNT,
QUATERNARY_MASK,
};
use crate::options::CollatorOptionsBitField;
use crate::options::{
AlternateHandling, CollatorOptions, MaxVariable, ResolvedCollatorOptions, Strength,
};
use crate::preferences::{CollationCaseFirst, CollationNumericOrdering, CollationType};
use crate::provider::CollationData;
use crate::provider::CollationDiacritics;
use crate::provider::CollationDiacriticsV1;
use crate::provider::CollationJamo;
use crate::provider::CollationJamoV1;
use crate::provider::CollationMetadataV1;
use crate::provider::CollationReordering;
use crate::provider::CollationReorderingV1;
use crate::provider::CollationRootV1;
use crate::provider::CollationSpecialPrimariesV1;
use crate::provider::CollationSpecialPrimariesValidated;
use crate::provider::CollationTailoringV1;
use core::cmp::Ordering;
use core::convert::{Infallible, TryFrom};
use icu_normalizer::provider::DecompositionData;
use icu_normalizer::provider::DecompositionTables;
use icu_normalizer::provider::NormalizerNfdDataV1;
use icu_normalizer::provider::NormalizerNfdTablesV1;
use icu_normalizer::DecomposingNormalizerBorrowed;
use icu_normalizer::Decomposition;
use icu_provider::marker::ErasedMarker;
use icu_provider::prelude::*;
use smallvec::SmallVec;
use utf16_iter::Utf16CharsEx;
use utf8_iter::Utf8CharsEx;
use zerovec::ule::AsULE;
const LEVEL_SEPARATOR_BYTE: u8 = 1;
const MERGE_SEPARATOR: char = '\u{fffe}';
const MERGE_SEPARATOR_BYTE: u8 = 2;
const MERGE_SEPARATOR_PRIMARY: u32 = 0x02000000;
const PRIMARY_COMPRESSION_LOW_BYTE: u8 = 3;
const PRIMARY_COMPRESSION_HIGH_BYTE: u8 = 0xff;
const COMMON_BYTE: u8 = 5;
const COMMON_WEIGHT16: u16 = 0x0500;
const PRIMARY_LEVEL_FLAG: u8 = 0x01;
const SECONDARY_LEVEL_FLAG: u8 = 0x02;
const CASE_LEVEL_FLAG: u8 = 0x04;
const TERTIARY_LEVEL_FLAG: u8 = 0x08;
const QUATERNARY_LEVEL_FLAG: u8 = 0x10;
const LEVEL_MASKS: [u8; Strength::Identical as usize + 1] = [
PRIMARY_LEVEL_FLAG,
PRIMARY_LEVEL_FLAG | SECONDARY_LEVEL_FLAG,
PRIMARY_LEVEL_FLAG | SECONDARY_LEVEL_FLAG | TERTIARY_LEVEL_FLAG,
PRIMARY_LEVEL_FLAG | SECONDARY_LEVEL_FLAG | TERTIARY_LEVEL_FLAG | QUATERNARY_LEVEL_FLAG,
0,
0,
0,
PRIMARY_LEVEL_FLAG | SECONDARY_LEVEL_FLAG | TERTIARY_LEVEL_FLAG | QUATERNARY_LEVEL_FLAG,
];
const WEIGHT_LOW: usize = 0;
const WEIGHT_MIDDLE: usize = 1;
const WEIGHT_HIGH: usize = 2;
const WEIGHT_MAX_COUNT: usize = 3;
const SEC_COMMON: [u8; 4] = [COMMON_BYTE, COMMON_BYTE + 0x20, COMMON_BYTE + 0x40, 0x21];
const CASE_LOWER_FIRST_COMMON: [u8; 4] = [1, 7, 13, 7];
const CASE_UPPER_FIRST_COMMON: [u8; 4] = [3, 0 , 15, 13];
const TER_ONLY_COMMON: [u8; 4] = [COMMON_BYTE, COMMON_BYTE + 0x60, COMMON_BYTE + 0xc0, 0x61];
const TER_LOWER_FIRST_COMMON: [u8; 4] = [COMMON_BYTE, COMMON_BYTE + 0x20, COMMON_BYTE + 0x40, 0x21];
const TER_UPPER_FIRST_COMMON: [u8; 4] = [
COMMON_BYTE + 0x80,
COMMON_BYTE + 0x80 + 0x20,
COMMON_BYTE + 0x80 + 0x40,
0x21,
];
const QUAT_COMMON: [u8; 4] = [0x1c, 0x1c + 0x70, 0x1c + 0xe0, 0x71];
const QUAT_SHIFTED_LIMIT_BYTE: u8 = QUAT_COMMON[WEIGHT_LOW] - 1;
const SLOPE_MIN: i32 = 3;
const SLOPE_MAX: i32 = 0xff;
const SLOPE_MIDDLE: i32 = 0x81;
const SLOPE_TAIL_COUNT: i32 = SLOPE_MAX - SLOPE_MIN + 1;
const SLOPE_SINGLE: i32 = 80;
const SLOPE_LEAD_2: i32 = 42;
const SLOPE_LEAD_3: i32 = 3;
const SLOPE_REACH_POS_1: i32 = SLOPE_SINGLE;
const SLOPE_REACH_NEG_1: i32 = -SLOPE_SINGLE;
const SLOPE_REACH_POS_2: i32 = SLOPE_LEAD_2 * SLOPE_TAIL_COUNT + (SLOPE_LEAD_2 - 1);
const SLOPE_REACH_NEG_2: i32 = -SLOPE_REACH_POS_2 - 1;
const SLOPE_REACH_POS_3: i32 = SLOPE_LEAD_3 * SLOPE_TAIL_COUNT * SLOPE_TAIL_COUNT
+ (SLOPE_LEAD_3 - 1) * SLOPE_TAIL_COUNT
+ (SLOPE_TAIL_COUNT - 1);
const SLOPE_REACH_NEG_3: i32 = -SLOPE_REACH_POS_3 - 1;
const SLOPE_START_POS_2: i32 = SLOPE_MIDDLE + SLOPE_SINGLE + 1;
const SLOPE_START_POS_3: i32 = SLOPE_START_POS_2 + SLOPE_LEAD_2;
const SLOPE_START_NEG_2: i32 = SLOPE_MIDDLE + SLOPE_REACH_NEG_1;
const SLOPE_START_NEG_3: i32 = SLOPE_START_NEG_2 - SLOPE_LEAD_2;
struct AnyQuaternaryAccumulator(u32);
impl AnyQuaternaryAccumulator {
#[inline(always)]
pub fn new() -> Self {
AnyQuaternaryAccumulator(0)
}
#[inline(always)]
pub fn accumulate(&mut self, non_primary: NonPrimary) {
self.0 |= non_primary.bits()
}
#[inline(always)]
pub fn has_quaternary(&self) -> bool {
self.0 & u32::from(QUATERNARY_MASK) != 0
}
}
#[inline(always)]
fn in_inclusive_range16(i: u16, start: u16, end: u16) -> bool {
i.wrapping_sub(start) <= (end - start)
}
#[cfg(feature = "latin1")]
trait Latin1Chars {
fn latin1_chars(&self) -> impl DoubleEndedIterator<Item = char>;
}
#[cfg(feature = "latin1")]
impl Latin1Chars for [u8] {
fn latin1_chars(&self) -> impl DoubleEndedIterator<Item = char> {
self.iter().map(|b| char::from(*b))
}
}
#[cfg(feature = "latin1")]
fn split_prefix_latin1<'a, 'b>(left: &'a [u8], right: &'b [u8]) -> (&'a [u8], &'a [u8], &'b [u8]) {
let i = left
.iter()
.zip(right.iter())
.take_while(|(l, r)| l == r)
.count();
if let Some((head, left_tail)) = left.split_at_checked(i) {
if let Some(right_tail) = right.get(i..) {
return (head, left_tail, right_tail);
}
}
(&[], left, right)
}
#[cfg(feature = "latin1")]
fn split_prefix_latin1_utf16<'a, 'b>(
left: &'a [u8],
right: &'b [u16],
) -> (&'a [u8], &'a [u8], &'b [u16]) {
let i = left
.iter()
.zip(right.iter())
.take_while(|(l, r)| u16::from(**l) == **r)
.count();
if let Some((head, left_tail)) = left.split_at_checked(i) {
if let Some(right_tail) = right.get(i..) {
return (head, left_tail, right_tail);
}
}
(&[], left, right)
}
fn split_prefix_u16<'a, 'b>(
left: &'a [u16],
right: &'b [u16],
) -> (&'a [u16], &'a [u16], &'b [u16]) {
let mut i = left
.iter()
.zip(right.iter())
.take_while(|(l, r)| l == r)
.count();
if i != 0 {
if let Some(&last) = left.get(i.wrapping_sub(1)) {
if in_inclusive_range16(last, 0xD800, 0xDBFF) {
i -= 1;
}
if let Some((head, left_tail)) = left.split_at_checked(i) {
if let Some(right_tail) = right.get(i..) {
return (head, left_tail, right_tail);
}
}
}
}
(&[], left, right)
}
fn split_prefix_u8<'a, 'b>(left: &'a [u8], right: &'b [u8]) -> (&'a [u8], &'a [u8], &'b [u8]) {
let mut i = left
.iter()
.zip(right.iter())
.take_while(|(l, r)| l == r)
.count();
if i != 0 {
if let Some(right_first) = right.get(i) {
if (right_first & 0b1100_0000) == 0b1000_0000 {
i -= 1;
}
}
while i != 0 {
if let Some(left_first) = left.get(i) {
if (left_first & 0b1100_0000) == 0b1000_0000 {
i -= 1;
continue;
}
}
break;
}
if let Some((head, left_tail)) = left.split_at_checked(i) {
if let Some(right_tail) = right.get(i..) {
return (head, left_tail, right_tail);
}
}
}
(&[], left, right)
}
fn split_prefix<'a, 'b>(left: &'a str, right: &'b str) -> (&'a str, &'a str, &'b str) {
let left_bytes = left.as_bytes();
let right_bytes = right.as_bytes();
let mut i = left_bytes
.iter()
.zip(right_bytes.iter())
.take_while(|(l, r)| l == r)
.count();
if i != 0 {
loop {
if let Some(left_first) = left_bytes.get(i) {
if (left_first & 0b1100_0000) == 0b1000_0000 {
i -= 1;
continue;
}
}
break;
}
if let Some((head, left_tail)) = left.split_at_checked(i) {
if let Some(right_tail) = right.get(i..) {
return (head, left_tail, right_tail);
}
}
}
("", left, right)
}
#[derive(Debug)]
struct LocaleSpecificDataHolder {
tailoring: Option<DataPayload<CollationTailoringV1>>,
diacritics: DataPayload<CollationDiacriticsV1>,
reordering: Option<DataPayload<CollationReorderingV1>>,
merged_options: CollatorOptionsBitField,
lithuanian_dot_above: bool,
}
icu_locale_core::preferences::define_preferences!(
[Copy]
CollatorPreferences,
{
collation_type: CollationType,
case_first: CollationCaseFirst,
numeric_ordering: CollationNumericOrdering
}
);
impl LocaleSpecificDataHolder {
fn try_new_unstable_internal<D>(
provider: &D,
prefs: CollatorPreferences,
options: CollatorOptions,
) -> Result<Self, DataError>
where
D: DataProvider<CollationTailoringV1>
+ DataProvider<CollationDiacriticsV1>
+ DataProvider<CollationMetadataV1>
+ DataProvider<CollationReorderingV1>
+ ?Sized,
{
let marker_attributes = prefs
.collation_type
.as_ref()
.map(|c| DataMarkerAttributes::from_str_or_panic(c.as_str()))
.unwrap_or_default();
let data_locale = CollationTailoringV1::make_locale(prefs.locale_preferences);
let req = DataRequest {
id: DataIdentifierBorrowed::for_marker_attributes_and_locale(
marker_attributes,
&data_locale,
),
metadata: {
let mut metadata = DataRequestMetadata::default();
metadata.silent = true;
metadata
},
};
let fallback_req = DataRequest {
id: DataIdentifierBorrowed::for_marker_attributes_and_locale(
Default::default(),
&data_locale,
),
..Default::default()
};
let metadata_payload: DataPayload<crate::provider::CollationMetadataV1> = provider
.load(req)
.or_else(|_| provider.load(fallback_req))?
.payload;
let metadata = metadata_payload.get();
let tailoring: Option<DataPayload<crate::provider::CollationTailoringV1>> =
if metadata.tailored() {
Some(
provider
.load(req)
.or_else(|_| provider.load(fallback_req))?
.payload,
)
} else {
None
};
let reordering: Option<DataPayload<crate::provider::CollationReorderingV1>> =
if metadata.reordering() {
Some(
provider
.load(req)
.or_else(|_| provider.load(fallback_req))?
.payload,
)
} else {
None
};
if let Some(reordering) = &reordering {
if reordering.get().reorder_table.len() != 256 {
return Err(DataError::custom("invalid").with_marker(CollationReorderingV1::INFO));
}
}
let tailored_diacritics = metadata.tailored_diacritics();
let diacritics: DataPayload<CollationDiacriticsV1> = provider
.load(if tailored_diacritics {
req
} else {
Default::default()
})?
.payload;
if tailored_diacritics {
if diacritics.get().secondaries.len() > OPTIMIZED_DIACRITICS_MAX_COUNT {
return Err(DataError::custom("invalid").with_marker(CollationDiacriticsV1::INFO));
}
} else if diacritics.get().secondaries.len() != OPTIMIZED_DIACRITICS_MAX_COUNT {
return Err(DataError::custom("invalid").with_marker(CollationDiacriticsV1::INFO));
}
let mut altered_defaults = CollatorOptionsBitField::default();
if metadata.alternate_shifted() {
altered_defaults.set_alternate_handling(Some(AlternateHandling::Shifted));
}
if metadata.backward_second_level() {
altered_defaults.set_backward_second_level(Some(true));
}
altered_defaults.set_case_first(Some(metadata.case_first()));
altered_defaults.set_max_variable(Some(metadata.max_variable()));
let mut merged_options = CollatorOptionsBitField::from(options);
merged_options.set_case_first(prefs.case_first);
merged_options.set_numeric_from_enum(prefs.numeric_ordering);
merged_options.set_defaults(altered_defaults);
Ok(LocaleSpecificDataHolder {
tailoring,
diacritics,
merged_options,
reordering,
lithuanian_dot_above: metadata.lithuanian_dot_above(),
})
}
}
#[derive(Debug)]
pub struct Collator {
special_primaries: DataPayload<ErasedMarker<CollationSpecialPrimariesValidated<'static>>>,
root: DataPayload<CollationRootV1>,
tailoring: Option<DataPayload<CollationTailoringV1>>,
jamo: DataPayload<CollationJamoV1>,
diacritics: DataPayload<CollationDiacriticsV1>,
options: CollatorOptionsBitField,
reordering: Option<DataPayload<CollationReorderingV1>>,
decompositions: DataPayload<NormalizerNfdDataV1>,
tables: DataPayload<NormalizerNfdTablesV1>,
lithuanian_dot_above: bool,
}
impl Collator {
pub fn as_borrowed(&self) -> CollatorBorrowed<'_> {
CollatorBorrowed {
special_primaries: self.special_primaries.get(),
root: self.root.get(),
tailoring: self.tailoring.as_ref().map(|s| s.get()),
jamo: self.jamo.get(),
diacritics: self.diacritics.get(),
options: self.options,
reordering: self.reordering.as_ref().map(|s| s.get()),
decompositions: self.decompositions.get(),
tables: self.tables.get(),
lithuanian_dot_above: self.lithuanian_dot_above,
}
}
#[cfg(feature = "compiled_data")]
pub fn try_new(
prefs: CollatorPreferences,
options: CollatorOptions,
) -> Result<CollatorBorrowed<'static>, DataError> {
CollatorBorrowed::try_new(prefs, options)
}
icu_provider::gen_buffer_data_constructors!(
(prefs: CollatorPreferences, options: CollatorOptions) -> error: DataError,
functions: [
try_new: skip,
try_new_with_buffer_provider,
try_new_unstable,
Self
]
);
#[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::try_new)]
pub fn try_new_unstable<D>(
provider: &D,
prefs: CollatorPreferences,
options: CollatorOptions,
) -> Result<Self, DataError>
where
D: DataProvider<CollationSpecialPrimariesV1>
+ DataProvider<CollationRootV1>
+ DataProvider<CollationTailoringV1>
+ DataProvider<CollationDiacriticsV1>
+ DataProvider<CollationJamoV1>
+ DataProvider<CollationMetadataV1>
+ DataProvider<CollationReorderingV1>
+ DataProvider<NormalizerNfdDataV1>
+ DataProvider<NormalizerNfdTablesV1>
+ ?Sized,
{
Self::try_new_unstable_internal(
provider,
provider.load(Default::default())?.payload,
provider.load(Default::default())?.payload,
provider.load(Default::default())?.payload,
provider.load(Default::default())?.payload,
provider.load(Default::default())?.payload,
prefs,
options,
)
}
#[expect(clippy::too_many_arguments)]
fn try_new_unstable_internal<D>(
provider: &D,
root: DataPayload<CollationRootV1>,
decompositions: DataPayload<NormalizerNfdDataV1>,
tables: DataPayload<NormalizerNfdTablesV1>,
jamo: DataPayload<CollationJamoV1>,
special_primaries: DataPayload<CollationSpecialPrimariesV1>,
prefs: CollatorPreferences,
options: CollatorOptions,
) -> Result<Self, DataError>
where
D: DataProvider<CollationRootV1>
+ DataProvider<CollationTailoringV1>
+ DataProvider<CollationDiacriticsV1>
+ DataProvider<CollationMetadataV1>
+ DataProvider<CollationReorderingV1>
+ ?Sized,
{
let locale_dependent =
LocaleSpecificDataHolder::try_new_unstable_internal(provider, prefs, options)?;
if jamo.get().ce32s.len() != JAMO_COUNT {
return Err(DataError::custom("invalid").with_marker(CollationJamoV1::INFO));
}
if special_primaries.get().last_primaries.len() <= (MaxVariable::Currency as usize) {
return Err(DataError::custom("invalid").with_marker(CollationSpecialPrimariesV1::INFO));
}
let special_primaries = special_primaries.map_project(|csp, _| {
let compressible_bytes = (csp.last_primaries.len()
== MaxVariable::Currency as usize + 16)
.then(|| {
csp.last_primaries
.as_maybe_borrowed()?
.as_ule_slice()
.get((MaxVariable::Currency as usize)..)?
.try_into()
.ok()
})
.flatten()
.unwrap_or(
CollationSpecialPrimariesValidated::HARDCODED_COMPRESSIBLE_BYTES_FALLBACK,
);
CollationSpecialPrimariesValidated {
last_primaries: csp.last_primaries.truncated(MaxVariable::Currency as usize),
numeric_primary: csp.numeric_primary,
compressible_bytes,
}
});
Ok(Collator {
special_primaries,
root,
tailoring: locale_dependent.tailoring,
jamo,
diacritics: locale_dependent.diacritics,
options: locale_dependent.merged_options,
reordering: locale_dependent.reordering,
decompositions,
tables,
lithuanian_dot_above: locale_dependent.lithuanian_dot_above,
})
}
}
macro_rules! compare {
($(#[$meta:meta])*,
$compare:ident,
$left_slice:ty,
$right_slice:ty,
$split_prefix:ident,
$left_to_iter:ident,
$right_to_iter:ident,
) => {
$(#[$meta])*
pub fn $compare(&self, left: &$left_slice, right: &$right_slice) -> Ordering {
let (head, left_tail, right_tail) = $split_prefix(left, right);
if left_tail.is_empty() && right_tail.is_empty() {
return Ordering::Equal;
}
let ret = self.compare_impl(left_tail.$left_to_iter(), right_tail.$right_to_iter(), head.$left_to_iter().rev());
if self.options.strength() == Strength::Identical && ret == Ordering::Equal {
return Decomposition::new(left_tail.$left_to_iter(), self.decompositions, self.tables).map(|c| if c != MERGE_SEPARATOR { c as i32 } else { -1i32 }).cmp(
Decomposition::new(right_tail.$right_to_iter(), self.decompositions, self.tables).map(|c| if c != MERGE_SEPARATOR { c as i32 } else { -1i32 }),
);
}
ret
}
}
}
#[derive(Debug)]
pub struct CollatorBorrowed<'a> {
special_primaries: &'a CollationSpecialPrimariesValidated<'a>,
root: &'a CollationData<'a>,
tailoring: Option<&'a CollationData<'a>>,
jamo: &'a CollationJamo<'a>,
diacritics: &'a CollationDiacritics<'a>,
options: CollatorOptionsBitField,
reordering: Option<&'a CollationReordering<'a>>,
decompositions: &'a DecompositionData<'a>,
tables: &'a DecompositionTables<'a>,
lithuanian_dot_above: bool,
}
impl CollatorBorrowed<'static> {
#[cfg(feature = "compiled_data")]
pub fn try_new(
prefs: CollatorPreferences,
options: CollatorOptions,
) -> Result<Self, DataError> {
let provider = &crate::provider::Baked;
let decompositions = icu_normalizer::provider::Baked::SINGLETON_NORMALIZER_NFD_DATA_V1;
let tables = icu_normalizer::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1;
let root = crate::provider::Baked::SINGLETON_COLLATION_ROOT_V1;
let jamo = crate::provider::Baked::SINGLETON_COLLATION_JAMO_V1;
let locale_dependent =
LocaleSpecificDataHolder::try_new_unstable_internal(provider, prefs, options)?;
const _: () = assert!(
crate::provider::Baked::SINGLETON_COLLATION_JAMO_V1
.ce32s
.as_slice()
.len()
== JAMO_COUNT
);
const _: () = assert!(
crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
.last_primaries
.as_slice()
.len()
> (MaxVariable::Currency as usize)
);
let special_primaries = const {
&CollationSpecialPrimariesValidated {
last_primaries: zerovec::ZeroSlice::from_ule_slice(
crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
.last_primaries
.as_slice()
.as_ule_slice()
.split_at(MaxVariable::Currency as usize)
.0,
)
.as_zerovec(),
numeric_primary: crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
.numeric_primary,
compressible_bytes: {
const C: &[<u16 as AsULE>::ULE] =
crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
.last_primaries
.as_slice()
.as_ule_slice();
if C.len() == MaxVariable::Currency as usize + 16 {
let i = MaxVariable::Currency as usize;
#[allow(clippy::indexing_slicing)] &[
C[i],
C[i + 1],
C[i + 2],
C[i + 3],
C[i + 4],
C[i + 5],
C[i + 6],
C[i + 7],
C[i + 8],
C[i + 9],
C[i + 10],
C[i + 11],
C[i + 12],
C[i + 13],
C[i + 14],
C[i + 15],
]
} else {
CollationSpecialPrimariesValidated::HARDCODED_COMPRESSIBLE_BYTES_FALLBACK
}
},
}
};
#[expect(clippy::unwrap_used)]
Ok(CollatorBorrowed {
special_primaries,
root,
tailoring: locale_dependent.tailoring.map(|s| s.get_static().unwrap()),
jamo,
diacritics: locale_dependent.diacritics.get_static().unwrap(),
options: locale_dependent.merged_options,
reordering: locale_dependent.reordering.map(|s| s.get_static().unwrap()),
decompositions,
tables,
lithuanian_dot_above: locale_dependent.lithuanian_dot_above,
})
}
pub const fn static_to_owned(self) -> Collator {
Collator {
special_primaries: DataPayload::from_static_ref(self.special_primaries),
root: DataPayload::from_static_ref(self.root),
tailoring: if let Some(s) = self.tailoring {
Some(DataPayload::from_static_ref(s))
} else {
None
},
jamo: DataPayload::from_static_ref(self.jamo),
diacritics: DataPayload::from_static_ref(self.diacritics),
options: self.options,
reordering: if let Some(s) = self.reordering {
Some(DataPayload::from_static_ref(s))
} else {
None
},
decompositions: DataPayload::from_static_ref(self.decompositions),
tables: DataPayload::from_static_ref(self.tables),
lithuanian_dot_above: self.lithuanian_dot_above,
}
}
}
macro_rules! collation_elements {
($self:expr, $chars:expr, $tailoring:expr, $numeric_primary:expr) => {{
let jamo = <&[<u32 as AsULE>::ULE; JAMO_COUNT]>::try_from($self.jamo.ce32s.as_ule_slice());
let jamo = jamo.unwrap();
CollationElements::new(
$chars,
$self.root,
$tailoring,
jamo,
&$self.diacritics.secondaries,
$self.decompositions,
$self.tables,
$numeric_primary,
$self.lithuanian_dot_above,
)
}};
}
impl CollatorBorrowed<'_> {
pub fn resolved_options(&self) -> ResolvedCollatorOptions {
self.options.into()
}
compare!(
,
compare,
str,
str,
split_prefix,
chars,
chars,
);
compare!(
,
compare_utf8,
[u8],
[u8],
split_prefix_u8,
chars,
chars,
);
compare!(
,
compare_utf16,
[u16],
[u16],
split_prefix_u16,
chars,
chars,
);
compare!(
#[cfg(feature = "latin1")]
,
compare_latin1,
[u8],
[u8],
split_prefix_latin1,
latin1_chars,
latin1_chars,
);
compare!(
#[cfg(feature = "latin1")]
,
compare_latin1_utf16,
[u8],
[u16],
split_prefix_latin1_utf16,
latin1_chars,
chars,
);
#[inline(always)]
fn tailoring_or_root(&self) -> &CollationData<'_> {
if let Some(tailoring) = &self.tailoring {
tailoring
} else {
self.root
}
}
#[inline(always)]
fn numeric_primary(&self) -> Option<u8> {
if self.options.numeric() {
Some(self.special_primaries.numeric_primary)
} else {
None
}
}
#[inline(always)]
fn variable_top(&self) -> u32 {
if self.options.alternate_handling() == AlternateHandling::NonIgnorable {
0
} else {
self.special_primaries
.last_primary_for_group(self.options.max_variable())
+ 1
}
}
fn compare_impl<
L: Iterator<Item = char>,
R: Iterator<Item = char>,
H: Iterator<Item = char>,
>(
&self,
left_chars: L,
right_chars: R,
mut head_chars: H,
) -> Ordering {
let mut left_ces: SmallVec<[CollationElement; CE_BUFFER_SIZE]> = SmallVec::new();
let mut right_ces: SmallVec<[CollationElement; CE_BUFFER_SIZE]> = SmallVec::new();
let mut any_variable = false;
let variable_top = self.variable_top();
let tailoring = self.tailoring_or_root();
let numeric_primary = self.numeric_primary();
let mut left = collation_elements!(self, left_chars, tailoring, numeric_primary);
let mut right = collation_elements!(self, right_chars, tailoring, numeric_primary);
#[expect(clippy::never_loop)]
'prefix: loop {
if let Some(mut head_last_c) = head_chars.next() {
let norm_trie = &self.decompositions.trie;
let mut head_last = CharacterAndClassAndTrieValue::new_with_trie_val(
head_last_c,
norm_trie.get(head_last_c),
);
let mut head_last_ce32 = CollationElement32::default();
let mut head_last_ok = false;
if let Some(left_different) = left.iter_next_before_init() {
left.prepend_upcoming_before_init(left_different.clone());
if let Some(right_different) = right.iter_next_before_init() {
right.prepend_upcoming_before_init(right_different.clone());
#[expect(clippy::never_loop)]
loop {
let decomposition = head_last.trie_val;
if (decomposition
& !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER))
== 0
{
} else if ((decomposition & HIGH_ZEROS_MASK) != 0)
&& ((decomposition & LOW_ZEROS_MASK) != 0)
{
head_last_c = char_from_u32(decomposition & 0x7FFF);
} else if decomposition == HANGUL_SYLLABLE_MARKER {
head_last_ce32 = FFFD_CE32;
} else {
break;
}
head_last_ok = true;
let mut left_ce32 = CollationElement32::default();
let mut right_ce32 = CollationElement32::default();
let left_c;
let right_c;
let decomposition = left_different.trie_val;
if (decomposition
& !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER))
== 0
{
left_c = left_different.character();
} else if ((decomposition & HIGH_ZEROS_MASK) != 0)
&& ((decomposition & LOW_ZEROS_MASK) != 0)
{
left_c = char_from_u32(decomposition & 0x7FFF);
} else if decomposition == HANGUL_SYLLABLE_MARKER {
left_c = '\u{0}';
left_ce32 = FFFD_CE32;
} else {
break;
}
let decomposition = right_different.trie_val;
if (decomposition
& !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER))
== 0
{
right_c = right_different.character();
} else if ((decomposition & HIGH_ZEROS_MASK) != 0)
&& ((decomposition & LOW_ZEROS_MASK) != 0)
{
right_c = char_from_u32(decomposition & 0x7FFF);
} else if decomposition == HANGUL_SYLLABLE_MARKER {
right_c = '\u{0}';
right_ce32 = FFFD_CE32;
} else {
break;
}
if head_last_ce32 == CollationElement32::default() {
head_last_ce32 = tailoring.ce32_for_char(head_last_c);
if head_last_ce32 == FALLBACK_CE32 {
head_last_ce32 = self.root.ce32_for_char(head_last_c);
}
if head_last_ce32.tag_checked() == Some(Tag::Contraction)
&& head_last_ce32.at_least_one_suffix_contains_starter()
{
break;
}
}
if left_ce32 == CollationElement32::default() {
left_ce32 = tailoring.ce32_for_char(left_c);
if left_ce32 == FALLBACK_CE32 {
left_ce32 = self.root.ce32_for_char(left_c);
}
if left_ce32.tag_checked() == Some(Tag::Prefix) {
break;
}
}
if right_ce32 == CollationElement32::default() {
right_ce32 = tailoring.ce32_for_char(right_c);
if right_ce32 == FALLBACK_CE32 {
right_ce32 = self.root.ce32_for_char(right_c);
}
if right_ce32.tag_checked() == Some(Tag::Prefix) {
break;
}
}
if self.options.numeric()
&& head_last_ce32.tag_checked() == Some(Tag::Digit)
&& (left_ce32.tag_checked() == Some(Tag::Digit)
|| right_ce32.tag_checked() == Some(Tag::Digit))
{
break;
}
break 'prefix;
}
}
}
let mut tail_first_c;
let mut tail_first_ce32;
let mut tail_first_ok;
loop {
left.prepend_upcoming_before_init(head_last.clone());
right.prepend_upcoming_before_init(head_last.clone());
tail_first_c = head_last_c;
tail_first_ce32 = head_last_ce32;
tail_first_ok = head_last_ok;
head_last_c = if let Some(head_last_c) = head_chars.next() {
head_last_c
} else {
break 'prefix;
};
let decomposition = norm_trie.get(head_last_c);
head_last = CharacterAndClassAndTrieValue::new_with_trie_val(
head_last_c,
decomposition,
);
head_last_ce32 = CollationElement32::default();
head_last_ok = false;
if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0 {
} else if ((decomposition & HIGH_ZEROS_MASK) != 0)
&& ((decomposition & LOW_ZEROS_MASK) != 0)
{
head_last_c = char_from_u32(decomposition & 0x7FFF);
} else if decomposition == HANGUL_SYLLABLE_MARKER {
head_last_ce32 = FFFD_CE32;
} else {
continue;
}
head_last_ok = true;
if !tail_first_ok {
continue;
}
if head_last_ce32 == CollationElement32::default() {
head_last_ce32 = tailoring.ce32_for_char(head_last_c);
if head_last_ce32 == FALLBACK_CE32 {
head_last_ce32 = self.root.ce32_for_char(head_last_c);
}
if head_last_ce32.tag_checked() == Some(Tag::Contraction)
&& head_last_ce32.at_least_one_suffix_contains_starter()
{
continue;
}
}
if tail_first_ce32 == CollationElement32::default() {
tail_first_ce32 = tailoring.ce32_for_char(tail_first_c);
if tail_first_ce32 == FALLBACK_CE32 {
tail_first_ce32 = self.root.ce32_for_char(tail_first_c);
}
} if tail_first_ce32.tag_checked() == Some(Tag::Prefix) {
continue;
}
if self.options.numeric()
&& head_last_ce32.tag_checked() == Some(Tag::Digit)
&& tail_first_ce32.tag_checked() == Some(Tag::Digit)
{
continue;
}
break 'prefix;
}
} else {
break 'prefix;
}
}
left.init();
right.init();
loop {
let mut left_primary;
'left_primary_loop: loop {
let ce = left.next();
left_primary = ce.primary();
if !(left_primary < variable_top && left_primary > MERGE_SEPARATOR_PRIMARY) {
left_ces.push(ce);
} else {
any_variable = true;
left_ces.push(ce.clone_with_non_primary_zeroed());
loop {
let ce = left.next();
left_primary = ce.primary();
if left_primary != 0
&& !(left_primary < variable_top
&& left_primary > MERGE_SEPARATOR_PRIMARY)
{
left_ces.push(ce);
break 'left_primary_loop;
}
left_ces.push(ce.clone_with_non_primary_zeroed());
}
}
if left_primary != 0 {
break;
}
}
let mut right_primary;
'right_primary_loop: loop {
let ce = right.next();
right_primary = ce.primary();
if !(right_primary < variable_top && right_primary > MERGE_SEPARATOR_PRIMARY) {
right_ces.push(ce);
} else {
any_variable = true;
right_ces.push(ce.clone_with_non_primary_zeroed());
loop {
let ce = right.next();
right_primary = ce.primary();
if right_primary != 0
&& !(right_primary < variable_top
&& right_primary > MERGE_SEPARATOR_PRIMARY)
{
right_ces.push(ce);
break 'right_primary_loop;
}
right_ces.push(ce.clone_with_non_primary_zeroed());
}
}
if right_primary != 0 {
break;
}
}
if left_primary != right_primary {
if let Some(reordering) = &self.reordering {
left_primary = reordering.reorder(left_primary);
right_primary = reordering.reorder(right_primary);
}
if left_primary < right_primary {
return Ordering::Less;
}
return Ordering::Greater;
}
if left_primary == NO_CE_PRIMARY {
break;
}
}
debug_assert_eq!(left_ces.last(), Some(&NO_CE));
debug_assert_eq!(right_ces.last(), Some(&NO_CE));
if self.options.strength() >= Strength::Secondary {
if !self.options.backward_second_level() {
let mut left_iter = left_ces.iter();
let mut right_iter = right_ces.iter();
let mut left_secondary;
let mut right_secondary;
loop {
loop {
left_secondary = left_iter.next().unwrap_or_default().secondary();
if left_secondary != 0 {
break;
}
}
loop {
right_secondary = right_iter.next().unwrap_or_default().secondary();
if right_secondary != 0 {
break;
}
}
if left_secondary != right_secondary {
if left_secondary < right_secondary {
return Ordering::Less;
}
return Ordering::Greater;
}
if left_secondary == NO_CE_SECONDARY {
break;
}
}
} else {
let mut left_remaining = &left_ces[..];
let mut right_remaining = &right_ces[..];
loop {
if left_remaining.is_empty() {
debug_assert!(right_remaining.is_empty());
break;
}
let (left_prefix, right_prefix) = {
let mut left_iter = left_remaining.iter();
loop {
let left_primary = left_iter.next().unwrap_or_default().primary();
if left_primary != 0 && left_primary <= MERGE_SEPARATOR_PRIMARY {
break;
}
debug_assert_ne!(left_primary, NO_CE_PRIMARY);
}
let left_new_remaining = left_iter.as_slice();
#[expect(clippy::indexing_slicing)]
let left_prefix =
&left_remaining[..left_remaining.len() - 1 - left_new_remaining.len()];
left_remaining = left_new_remaining;
let mut right_iter = right_remaining.iter();
loop {
let right_primary = right_iter.next().unwrap_or_default().primary();
if right_primary != 0 && right_primary <= MERGE_SEPARATOR_PRIMARY {
break;
}
debug_assert_ne!(right_primary, NO_CE_PRIMARY);
}
let right_new_remaining = right_iter.as_slice();
#[expect(clippy::indexing_slicing)]
let right_prefix = &right_remaining
[..right_remaining.len() - 1 - right_new_remaining.len()];
right_remaining = right_new_remaining;
(left_prefix, right_prefix)
};
let mut left_iter = left_prefix.iter();
let mut right_iter = right_prefix.iter();
let mut left_secondary;
let mut right_secondary;
loop {
loop {
left_secondary = left_iter.next_back().unwrap_or_default().secondary();
if left_secondary != 0 {
break;
}
}
loop {
right_secondary =
right_iter.next_back().unwrap_or_default().secondary();
if right_secondary != 0 {
break;
}
}
if left_secondary != right_secondary {
if left_secondary < right_secondary {
return Ordering::Less;
}
return Ordering::Greater;
}
if left_secondary == NO_CE_SECONDARY {
break;
}
}
}
}
}
if self.options.case_level() {
if self.options.strength() == Strength::Primary {
let mut left_non_primary;
let mut right_non_primary;
let mut left_case;
let mut right_case;
let mut left_iter = left_ces.iter();
let mut right_iter = right_ces.iter();
loop {
loop {
let ce = left_iter.next().unwrap_or_default();
left_non_primary = ce.non_primary();
if !ce.either_half_zero() {
break;
}
}
left_case = left_non_primary.case();
loop {
let ce = right_iter.next().unwrap_or_default();
right_non_primary = ce.non_primary();
if !ce.either_half_zero() {
break;
}
}
right_case = right_non_primary.case();
if left_case != right_case {
if !self.options.upper_first() {
if left_case < right_case {
return Ordering::Less;
}
return Ordering::Greater;
}
if left_case < right_case {
return Ordering::Greater;
}
return Ordering::Less;
}
if left_non_primary.secondary() == NO_CE_SECONDARY {
break;
}
}
} else {
let mut left_non_primary;
let mut right_non_primary;
let mut left_case;
let mut right_case;
let mut left_iter = left_ces.iter();
let mut right_iter = right_ces.iter();
loop {
loop {
left_non_primary = left_iter.next().unwrap_or_default().non_primary();
if left_non_primary.secondary() != 0 {
break;
}
}
left_case = left_non_primary.case();
loop {
right_non_primary = right_iter.next().unwrap_or_default().non_primary();
if right_non_primary.secondary() != 0 {
break;
}
}
right_case = right_non_primary.case();
if left_case != right_case {
if !self.options.upper_first() {
if left_case < right_case {
return Ordering::Less;
}
return Ordering::Greater;
}
if left_case < right_case {
return Ordering::Greater;
}
return Ordering::Less;
}
if left_non_primary.secondary() == NO_CE_SECONDARY {
break;
}
}
}
}
if let Some(tertiary_mask) = self.options.tertiary_mask() {
let mut any_quaternaries = AnyQuaternaryAccumulator::new();
let mut left_iter = left_ces.iter();
let mut right_iter = right_ces.iter();
loop {
let mut left_non_primary;
let mut left_tertiary;
loop {
left_non_primary = left_iter.next().unwrap_or_default().non_primary();
any_quaternaries.accumulate(left_non_primary);
debug_assert!(
left_non_primary.tertiary() != 0 || left_non_primary.case_quaternary() == 0
);
left_tertiary = left_non_primary.tertiary_case_quarternary(tertiary_mask);
if left_tertiary != 0 {
break;
}
}
let mut right_non_primary;
let mut right_tertiary;
loop {
right_non_primary = right_iter.next().unwrap_or_default().non_primary();
any_quaternaries.accumulate(right_non_primary);
debug_assert!(
right_non_primary.tertiary() != 0
|| right_non_primary.case_quaternary() == 0
);
right_tertiary = right_non_primary.tertiary_case_quarternary(tertiary_mask);
if right_tertiary != 0 {
break;
}
}
if left_tertiary != right_tertiary {
if self.options.upper_first() {
if left_tertiary > NO_CE_TERTIARY {
if left_non_primary.secondary() != 0 {
left_tertiary ^= 0xC000;
} else {
left_tertiary += 0x4000;
}
}
if right_tertiary > NO_CE_TERTIARY {
if right_non_primary.secondary() != 0 {
right_tertiary ^= 0xC000;
} else {
right_tertiary += 0x4000;
}
}
}
if left_tertiary < right_tertiary {
return Ordering::Less;
}
return Ordering::Greater;
}
if left_tertiary == NO_CE_TERTIARY {
break;
}
}
if !any_variable && !any_quaternaries.has_quaternary() {
return Ordering::Equal;
}
} else {
return Ordering::Equal;
}
if self.options.strength() <= Strength::Tertiary {
return Ordering::Equal;
}
let mut left_iter = left_ces.iter();
let mut right_iter = right_ces.iter();
loop {
let mut left_quaternary;
loop {
let ce = left_iter.next().unwrap_or_default();
if ce.tertiary_ignorable() {
left_quaternary = ce.primary();
} else {
left_quaternary = ce.quaternary();
}
if left_quaternary != 0 {
break;
}
}
let mut right_quaternary;
loop {
let ce = right_iter.next().unwrap_or_default();
if ce.tertiary_ignorable() {
right_quaternary = ce.primary();
} else {
right_quaternary = ce.quaternary();
}
if right_quaternary != 0 {
break;
}
}
if left_quaternary != right_quaternary {
if let Some(reordering) = &self.reordering {
left_quaternary = reordering.reorder(left_quaternary);
right_quaternary = reordering.reorder(right_quaternary);
}
if left_quaternary < right_quaternary {
return Ordering::Less;
}
return Ordering::Greater;
}
if left_quaternary == NO_CE_PRIMARY {
break;
}
}
Ordering::Equal
}
fn sort_key_levels(&self) -> u8 {
#[expect(clippy::indexing_slicing)]
let mut levels = LEVEL_MASKS[self.options.strength() as usize];
if self.options.case_level() {
levels |= CASE_LEVEL_FLAG;
}
levels
}
pub fn write_sort_key_to<S>(&self, s: &str, sink: &mut S) -> Result<S::Output, S::Error>
where
S: CollationKeySink + ?Sized,
S::State: Default,
{
self.write_sort_key_impl(s.chars(), sink)
}
pub fn write_sort_key_utf8_to<S>(&self, s: &[u8], sink: &mut S) -> Result<S::Output, S::Error>
where
S: CollationKeySink + ?Sized,
S::State: Default,
{
self.write_sort_key_impl(s.chars(), sink)
}
pub fn write_sort_key_utf16_to<S>(&self, s: &[u16], sink: &mut S) -> Result<S::Output, S::Error>
where
S: CollationKeySink + ?Sized,
S::State: Default,
{
self.write_sort_key_impl(s.chars(), sink)
}
fn write_sort_key_impl<I, S>(&self, iter: I, sink: &mut S) -> Result<S::Output, S::Error>
where
I: Iterator<Item = char> + Clone,
S: CollationKeySink + ?Sized,
S::State: Default,
{
let identical = if self.options.strength() == Strength::Identical {
Some(iter.clone())
} else {
None
};
let mut state = S::State::default();
self.write_sort_key_up_to_quaternary(iter, sink, &mut state)?;
if let Some(iter) = identical {
let nfd =
DecomposingNormalizerBorrowed::new_with_data(self.decompositions, self.tables);
sink.write_byte(&mut state, LEVEL_SEPARATOR_BYTE)?;
let iter = nfd.normalize_iter(iter);
write_identical_level(iter, sink, &mut state)?;
}
sink.finish(state)
}
fn write_sort_key_up_to_quaternary<I, S>(
&self,
iter: I,
sink: &mut S,
state: &mut S::State,
) -> Result<(), S::Error>
where
I: Iterator<Item = char>,
S: CollationKeySink + ?Sized,
{
let levels = self.sort_key_levels();
let mut iter =
collation_elements!(self, iter, self.tailoring_or_root(), self.numeric_primary());
iter.init();
let variable_top = self.variable_top();
let tertiary_mask = self.options.tertiary_mask().unwrap_or_default();
let mut cases = SortKeyLevel::default();
let mut secondaries = SortKeyLevel::default();
let mut tertiaries = SortKeyLevel::default();
let mut quaternaries = SortKeyLevel::default();
let mut prev_reordered_primary = 0;
let mut common_cases = 0usize;
let mut common_secondaries = 0usize;
let mut common_tertiaries = 0usize;
let mut common_quaternaries = 0usize;
let mut prev_secondary = 0;
let mut sec_segment_start = 0;
loop {
let mut ce = iter.next();
let mut p = ce.primary();
if p < variable_top && p > MERGE_SEPARATOR_PRIMARY {
if common_quaternaries != 0 {
common_quaternaries -= 1;
while common_quaternaries >= QUAT_COMMON[WEIGHT_MAX_COUNT] as _ {
quaternaries.append_byte(QUAT_COMMON[WEIGHT_MIDDLE]);
common_quaternaries -= QUAT_COMMON[WEIGHT_MAX_COUNT] as usize;
}
quaternaries.append_byte(QUAT_COMMON[WEIGHT_LOW] + common_quaternaries as u8);
common_quaternaries = 0;
}
loop {
if levels & QUATERNARY_LEVEL_FLAG != 0 {
if let Some(reordering) = &self.reordering {
p = reordering.reorder(p);
}
if (p >> 24) as u8 >= QUAT_SHIFTED_LIMIT_BYTE {
quaternaries.append_byte(QUAT_SHIFTED_LIMIT_BYTE);
}
quaternaries.append_weight_32(p);
}
loop {
ce = iter.next();
p = ce.primary();
if p != 0 {
break;
}
}
if !(p < variable_top && p > MERGE_SEPARATOR_PRIMARY) {
break;
}
}
}
if p > NO_CE_PRIMARY && levels & PRIMARY_LEVEL_FLAG != 0 {
let is_compressible = self.special_primaries.is_compressible((p >> 24) as _);
if let Some(reordering) = &self.reordering {
p = reordering.reorder(p);
}
let p1 = (p >> 24) as u8;
if !is_compressible || p1 != (prev_reordered_primary >> 24) as u8 {
if prev_reordered_primary != 0 {
if p < prev_reordered_primary {
if p1 > MERGE_SEPARATOR_BYTE {
sink.write(state, &[PRIMARY_COMPRESSION_LOW_BYTE])?;
}
} else {
sink.write(state, &[PRIMARY_COMPRESSION_HIGH_BYTE])?;
}
}
sink.write_byte(state, p1)?;
prev_reordered_primary = if is_compressible { p } else { 0 };
}
let p2 = (p >> 16) as u8;
if p2 != 0 {
let (b0, b1, b2) = (p2, (p >> 8) as _, p as _);
sink.write_byte(state, b0)?;
if b1 != 0 {
sink.write_byte(state, b1)?;
if b2 != 0 {
sink.write_byte(state, b2)?;
}
}
}
}
let non_primary = ce.non_primary();
if non_primary.ignorable() {
continue; }
macro_rules! handle_common {
($key:ident, $w:ident, $common:ident, $weights:ident, $lim:expr) => {
if $common != 0 {
$common -= 1;
while $common >= $weights[WEIGHT_MAX_COUNT] as _ {
$key.append_byte($weights[WEIGHT_MIDDLE]);
$common -= $weights[WEIGHT_MAX_COUNT] as usize;
}
let b = if $w < $lim {
$weights[WEIGHT_LOW] + ($common as u8)
} else {
$weights[WEIGHT_HIGH] - ($common as u8)
};
$key.append_byte(b);
$common = 0;
}
};
($key:ident, $w:ident, $common:ident, $weights:ident) => {
handle_common!($key, $w, $common, $weights, COMMON_WEIGHT16);
};
}
if levels & SECONDARY_LEVEL_FLAG != 0 {
let s = non_primary.secondary();
if s == 0 {
} else if s == COMMON_WEIGHT16
&& (!self.options.backward_second_level() || p != MERGE_SEPARATOR_PRIMARY)
{
common_secondaries += 1;
} else if !self.options.backward_second_level() {
handle_common!(secondaries, s, common_secondaries, SEC_COMMON);
secondaries.append_weight_16(s);
} else {
if common_secondaries != 0 {
common_secondaries -= 1;
let remainder = common_secondaries % SEC_COMMON[WEIGHT_MAX_COUNT] as usize;
let b = if prev_secondary < COMMON_WEIGHT16 {
SEC_COMMON[WEIGHT_LOW] + remainder as u8
} else {
SEC_COMMON[WEIGHT_HIGH] - remainder as u8
};
secondaries.append_byte(b);
common_secondaries -= remainder;
while common_secondaries > 0 {
secondaries.append_byte(SEC_COMMON[WEIGHT_MIDDLE]);
common_secondaries -= SEC_COMMON[WEIGHT_MAX_COUNT] as usize;
}
}
if 0 < p && p <= MERGE_SEPARATOR_PRIMARY {
let secs = &mut secondaries.buf;
let last = secs.len() - 1;
if sec_segment_start < last {
let mut q = sec_segment_start;
let mut r = last;
#[expect(clippy::indexing_slicing)]
while q < r {
let b = secs[q];
secs[q] = secs[r];
q += 1;
secs[r] = b;
r -= 1;
}
}
let b = if p == NO_CE_PRIMARY {
LEVEL_SEPARATOR_BYTE
} else {
MERGE_SEPARATOR_BYTE
};
secondaries.append_byte(b);
prev_secondary = 0;
sec_segment_start = secondaries.len();
} else {
secondaries.append_reverse_weight_16(s);
prev_secondary = s;
}
}
}
if levels & CASE_LEVEL_FLAG != 0 {
if self.options.strength() == Strength::Primary && p == 0
|| non_primary.bits() <= 0xffff
{
} else {
let mut c = ((non_primary.bits() >> 8) & 0xff) as u8;
debug_assert_ne!(c & 0xc0, 0xc0);
if c & 0xc0 == 0 && c > LEVEL_SEPARATOR_BYTE {
common_cases += 1;
} else {
if !self.options.upper_first() {
if common_cases != 0 && (c > LEVEL_SEPARATOR_BYTE || !cases.is_empty())
{
common_cases -= 1;
while common_cases >= CASE_LOWER_FIRST_COMMON[WEIGHT_MAX_COUNT] as _
{
cases.append_byte(CASE_LOWER_FIRST_COMMON[WEIGHT_MIDDLE] << 4);
common_cases -=
CASE_LOWER_FIRST_COMMON[WEIGHT_MAX_COUNT] as usize;
}
let b = if c <= LEVEL_SEPARATOR_BYTE {
CASE_LOWER_FIRST_COMMON[WEIGHT_LOW] + common_cases as u8
} else {
CASE_LOWER_FIRST_COMMON[WEIGHT_HIGH] - common_cases as u8
};
cases.append_byte(b << 4);
common_cases = 0;
}
if c > LEVEL_SEPARATOR_BYTE {
c = (CASE_LOWER_FIRST_COMMON[WEIGHT_HIGH] + (c >> 6)) << 4;
}
} else {
if common_cases != 0 {
common_cases -= 1;
while common_cases >= CASE_UPPER_FIRST_COMMON[WEIGHT_MAX_COUNT] as _
{
cases.append_byte(CASE_UPPER_FIRST_COMMON[WEIGHT_LOW] << 4);
common_cases -=
CASE_UPPER_FIRST_COMMON[WEIGHT_MAX_COUNT] as usize;
}
cases.append_byte(
(CASE_UPPER_FIRST_COMMON[WEIGHT_LOW] + common_cases as u8) << 4,
);
common_cases = 0;
}
if c > LEVEL_SEPARATOR_BYTE {
c = (CASE_UPPER_FIRST_COMMON[WEIGHT_LOW] - (c >> 6)) << 4;
}
}
cases.append_byte(c);
}
}
}
if levels & TERTIARY_LEVEL_FLAG != 0 {
let mut t = non_primary.tertiary_case_quarternary(tertiary_mask);
debug_assert_ne!(non_primary.bits() & 0xc000, 0xc000);
if t == COMMON_WEIGHT16 {
common_tertiaries += 1;
} else if tertiary_mask & 0x8000 == 0 {
handle_common!(tertiaries, t, common_tertiaries, TER_ONLY_COMMON);
if t > COMMON_WEIGHT16 {
t += 0xc000;
}
tertiaries.append_weight_16(t);
} else if !self.options.upper_first() {
handle_common!(tertiaries, t, common_tertiaries, TER_LOWER_FIRST_COMMON);
if t > COMMON_WEIGHT16 {
t += 0x4000;
}
tertiaries.append_weight_16(t);
} else {
if t <= NO_CE_TERTIARY {
} else if non_primary.bits() > 0xffff {
t ^= 0xc000;
if t < (TER_UPPER_FIRST_COMMON[WEIGHT_HIGH] as u16) << 8 {
t -= 0x4000;
}
} else {
debug_assert!((0x8600..=0xbfff).contains(&t));
t += 0x4000;
}
handle_common!(
tertiaries,
t,
common_tertiaries,
TER_UPPER_FIRST_COMMON,
(TER_UPPER_FIRST_COMMON[WEIGHT_LOW] as u16) << 8
);
tertiaries.append_weight_16(t);
}
}
if levels & QUATERNARY_LEVEL_FLAG != 0 {
let q = (non_primary.bits() & 0xffff) as u16;
if q & 0xc0 == 0 && q > NO_CE_QUATERNARY {
common_quaternaries += 1;
} else if q == NO_CE_QUATERNARY
&& self.options.alternate_handling() == AlternateHandling::NonIgnorable
&& quaternaries.is_empty()
{
quaternaries.append_byte(LEVEL_SEPARATOR_BYTE);
} else {
let q = if q == NO_CE_QUATERNARY {
LEVEL_SEPARATOR_BYTE
} else {
(0xfc + ((q >> 6) & 3)) as u8
};
handle_common!(
quaternaries,
q,
common_quaternaries,
QUAT_COMMON,
QUAT_COMMON[WEIGHT_LOW]
);
quaternaries.append_byte(q);
}
}
if (non_primary.bits() >> 24) as u8 == LEVEL_SEPARATOR_BYTE {
break; }
}
macro_rules! write_level {
($key:ident, $flag:ident) => {
if levels & $flag != 0 {
sink.write(state, &[LEVEL_SEPARATOR_BYTE])?;
sink.write(state, &$key.buf)?;
}
};
}
write_level!(secondaries, SECONDARY_LEVEL_FLAG);
if levels & CASE_LEVEL_FLAG != 0 {
sink.write(state, &[LEVEL_SEPARATOR_BYTE])?;
let mut b = 0;
if let Some((last, head)) = cases.buf.split_last() {
debug_assert_eq!(*last, 1); for c in head {
debug_assert_eq!(*c & 0xf, 0);
debug_assert_ne!(*c, 0);
if b == 0 {
b = *c;
} else {
sink.write_byte(state, b | (*c >> 4))?;
b = 0;
}
}
} else {
debug_assert!(false);
}
if b != 0 {
sink.write_byte(state, b)?;
}
}
write_level!(tertiaries, TERTIARY_LEVEL_FLAG);
write_level!(quaternaries, QUATERNARY_LEVEL_FLAG);
Ok(())
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct TooSmall {
pub length: usize,
}
impl TooSmall {
pub fn new(length: usize) -> Self {
Self { length }
}
}
pub trait CollationKeySink {
type Error;
type State;
type Output;
fn write(&mut self, state: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error>;
fn write_byte(&mut self, state: &mut Self::State, b: u8) -> Result<(), Self::Error> {
self.write(state, &[b])
}
fn finish(&mut self, state: Self::State) -> Result<Self::Output, Self::Error>;
}
impl CollationKeySink for Vec<u8> {
type Error = Infallible;
type State = ();
type Output = ();
fn write(&mut self, _: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error> {
self.extend_from_slice(buf);
Ok(())
}
fn finish(&mut self, _: Self::State) -> Result<Self::Output, Self::Error> {
Ok(())
}
}
impl CollationKeySink for VecDeque<u8> {
type Error = Infallible;
type State = ();
type Output = ();
fn write(&mut self, _: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error> {
self.extend(buf.iter());
Ok(())
}
fn finish(&mut self, _: Self::State) -> Result<Self::Output, Self::Error> {
Ok(())
}
}
impl<const N: usize> CollationKeySink for SmallVec<[u8; N]> {
type Error = Infallible;
type State = ();
type Output = ();
fn write(&mut self, _: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error> {
self.extend_from_slice(buf);
Ok(())
}
fn finish(&mut self, _: Self::State) -> Result<Self::Output, Self::Error> {
Ok(())
}
}
impl CollationKeySink for [u8] {
type Error = TooSmall;
type State = usize;
type Output = usize;
fn write(&mut self, offset: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error> {
if *offset + buf.len() <= self.len() {
#[expect(clippy::indexing_slicing)]
self[*offset..*offset + buf.len()].copy_from_slice(buf);
}
*offset += buf.len();
Ok(())
}
fn finish(&mut self, offset: Self::State) -> Result<Self::Output, Self::Error> {
if offset <= self.len() {
Ok(offset)
} else {
Err(TooSmall::new(offset))
}
}
}
#[derive(Default)]
struct SortKeyLevel {
buf: SmallVec<[u8; 40]>,
}
impl SortKeyLevel {
fn len(&self) -> usize {
self.buf.len()
}
fn is_empty(&self) -> bool {
self.buf.is_empty()
}
fn append_byte(&mut self, x: u8) {
self.buf.push(x);
}
fn append_weight_16(&mut self, w: u16) {
debug_assert_ne!(w, 0);
let b0 = (w >> 8) as u8;
let b1 = w as u8;
self.append_byte(b0);
if b1 != 0 {
self.append_byte(b1);
}
}
fn append_reverse_weight_16(&mut self, w: u16) {
debug_assert_ne!(w, 0);
let b0 = (w >> 8) as u8;
let b1 = w as u8;
if b1 != 0 {
self.append_byte(b1);
}
self.append_byte(b0);
}
fn append_weight_32(&mut self, w: u32) {
debug_assert_ne!(w, 0);
let b0 = (w >> 24) as u8;
let b1 = (w >> 16) as u8;
let b2 = (w >> 8) as u8;
let b3 = w as u8;
self.append_byte(b0);
if b1 != 0 {
self.append_byte(b1);
if b2 != 0 {
self.append_byte(b2);
if b3 != 0 {
self.append_byte(b3);
}
}
}
}
}
macro_rules! negdivmod {
($n:ident, $d:ident, $m:ident) => {
$m = $n % $d;
$n /= $d;
if $m < 0 {
$n -= 1;
$m += $d;
}
};
}
fn write_diff<S>(mut diff: i32, sink: &mut S, state: &mut S::State) -> Result<(), S::Error>
where
S: CollationKeySink + ?Sized,
{
let mut out = |b| sink.write_byte(state, b);
if diff >= SLOPE_REACH_NEG_1 {
if diff <= SLOPE_REACH_POS_1 {
out((SLOPE_MIDDLE + diff) as _)?;
} else if diff <= SLOPE_REACH_POS_2 {
out((SLOPE_START_POS_2 + (diff / SLOPE_TAIL_COUNT)) as _)?;
out((SLOPE_MIN + diff % SLOPE_TAIL_COUNT) as _)?;
} else if diff <= SLOPE_REACH_POS_3 {
let p2 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
diff /= SLOPE_TAIL_COUNT;
let p1 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
let p0 = SLOPE_START_POS_3 + (diff / SLOPE_TAIL_COUNT);
out(p0 as _)?;
out(p1 as _)?;
out(p2 as _)?;
} else {
let p3 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
diff /= SLOPE_TAIL_COUNT;
let p2 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
diff /= SLOPE_TAIL_COUNT;
let p1 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
out(SLOPE_MAX as _)?;
out(p1 as _)?;
out(p2 as _)?;
out(p3 as _)?;
}
} else {
let mut m;
if diff >= SLOPE_REACH_NEG_2 {
negdivmod!(diff, SLOPE_TAIL_COUNT, m);
out((SLOPE_START_NEG_2 + diff) as _)?;
out((SLOPE_MIN + m) as _)?;
} else if diff >= SLOPE_REACH_NEG_3 {
negdivmod!(diff, SLOPE_TAIL_COUNT, m);
let p2 = SLOPE_MIN + m;
negdivmod!(diff, SLOPE_TAIL_COUNT, m);
let p1 = SLOPE_MIN + m;
let p0 = SLOPE_START_NEG_3 + diff;
out(p0 as _)?;
out(p1 as _)?;
out(p2 as _)?;
} else {
negdivmod!(diff, SLOPE_TAIL_COUNT, m);
let p3 = SLOPE_MIN + m;
negdivmod!(diff, SLOPE_TAIL_COUNT, m);
let p2 = SLOPE_MIN + m;
negdivmod!(diff, SLOPE_TAIL_COUNT, m);
let p1 = SLOPE_MIN + m;
let _ = diff;
out(SLOPE_MIN as _)?;
out(p1 as _)?;
out(p2 as _)?;
out(p3 as _)?;
}
}
Ok(())
}
fn write_identical_level<I, S>(iter: I, sink: &mut S, state: &mut S::State) -> Result<(), S::Error>
where
I: Iterator<Item = char>,
S: CollationKeySink + ?Sized,
{
let mut prev = 0i32;
for c in iter {
if !(0x4e00..=0xa000).contains(&prev) {
prev = (prev & !0x7f) - SLOPE_REACH_NEG_1;
} else {
prev = 0x9fff - SLOPE_REACH_POS_2;
}
if c == MERGE_SEPARATOR {
sink.write_byte(state, MERGE_SEPARATOR_BYTE)?;
prev = 0;
} else {
let c = c as i32;
write_diff(c - prev, sink, state)?;
prev = c;
}
}
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
use icu_locale::locale;
type Key = Vec<u8>;
fn collator_en(strength: Strength) -> CollatorBorrowed<'static> {
let locale = locale!("en").into();
let mut options = CollatorOptions::default();
options.strength = Some(strength);
Collator::try_new(locale, options).unwrap()
}
fn collator_en_case_level(strength: Strength) -> CollatorBorrowed<'static> {
let locale = locale!("en").into();
let mut options = CollatorOptions::default();
options.strength = Some(strength);
options.case_level = Some(crate::options::CaseLevel::On);
Collator::try_new(locale, options).unwrap()
}
fn keys(strength: Strength) -> (Key, Key, Key) {
let collator = collator_en(strength);
let mut k0 = Vec::new();
let Ok(()) = collator.write_sort_key_to("aabc", &mut k0);
let mut k1 = Vec::new();
let Ok(()) = collator.write_sort_key_to("aAbc", &mut k1);
let mut k2 = Vec::new();
let Ok(()) = collator.write_sort_key_to("áAbc", &mut k2);
(k0, k1, k2)
}
#[test]
fn sort_key_primary() {
let (k0, k1, k2) = keys(Strength::Primary);
assert_eq!(k0, k1);
assert_eq!(k1, k2);
}
#[test]
fn sort_key_secondary() {
let (k0, k1, k2) = keys(Strength::Secondary);
assert_eq!(k0, k1);
assert!(k1 < k2);
}
#[test]
fn sort_key_tertiary() {
let (k0, k1, k2) = keys(Strength::Tertiary);
assert!(k0 < k1);
assert!(k1 < k2);
}
fn collator_ja(strength: Strength) -> CollatorBorrowed<'static> {
let locale = locale!("ja").into();
let mut options = CollatorOptions::default();
options.strength = Some(strength);
Collator::try_new(locale, options).unwrap()
}
fn keys_ja_strs(strength: Strength, s0: &str, s1: &str) -> (Key, Key) {
let collator = collator_ja(strength);
let mut k0 = Vec::new();
let Ok(()) = collator.write_sort_key_to(s0, &mut k0);
let mut k1 = Vec::new();
let Ok(()) = collator.write_sort_key_to(s1, &mut k1);
(k0, k1)
}
fn keys_ja(strength: Strength) -> (Key, Key) {
keys_ja_strs(strength, "あ", "ア")
}
#[test]
fn sort_keys_ja_to_quaternary() {
let (k0, k1) = keys_ja(Strength::Primary);
assert_eq!(k0, k1);
let (k0, k1) = keys_ja(Strength::Secondary);
assert_eq!(k0, k1);
let (k0, k1) = keys_ja(Strength::Tertiary);
assert_eq!(k0, k1);
let (k0, k1) = keys_ja(Strength::Quaternary);
assert!(k0 < k1);
}
#[test]
fn sort_keys_ja_identical() {
let (k0, k1) = keys_ja_strs(Strength::Quaternary, "ア", "ア");
assert_eq!(k0, k1);
let (k0, k1) = keys_ja_strs(Strength::Identical, "ア", "ア");
assert!(k0 < k1);
}
#[test]
fn sort_keys_utf16() {
let collator = collator_en(Strength::Identical);
const STR8: &[u8] = b"hello world!";
let mut k8 = Vec::new();
let Ok(()) = collator.write_sort_key_utf8_to(STR8, &mut k8);
const STR16: &[u16] = &[
0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
];
let mut k16 = Vec::new();
let Ok(()) = collator.write_sort_key_utf16_to(STR16, &mut k16);
assert_eq!(k8, k16);
}
#[test]
fn sort_keys_invalid() {
let collator = collator_en(Strength::Identical);
let mut k = Vec::new();
let Ok(()) = collator.write_sort_key_utf8_to(b"\xf0\x90", &mut k);
let mut k = Vec::new();
let Ok(()) = collator.write_sort_key_utf16_to(&[0xdd1e], &mut k);
}
#[test]
fn sort_key_to_vecdeque() {
let collator = collator_en(Strength::Identical);
let mut k0 = Vec::new();
let Ok(()) = collator.write_sort_key_to("áAbc", &mut k0);
let mut k1 = VecDeque::new();
let Ok(()) = collator.write_sort_key_to("áAbc", &mut k1);
assert!(k0.iter().eq(k1.iter()));
}
#[test]
fn sort_key_to_slice() {
let collator = collator_en(Strength::Identical);
let mut k0 = Vec::new();
let Ok(()) = collator.write_sort_key_to("áAbc", &mut k0);
let mut k1 = [0u8; 100];
let len = collator.write_sort_key_to("áAbc", &mut k1[..]).unwrap();
assert_eq!(len, k0.len());
assert!(k0.iter().eq(k1[..len].iter()));
}
#[test]
fn sort_key_to_slice_no_space() {
let collator = collator_en(Strength::Identical);
let mut k = [0u8; 0];
let res = collator.write_sort_key_to("áAbc", &mut k[..]);
assert!(matches!(res, Err(TooSmall { .. })));
}
#[test]
fn sort_key_to_slice_too_long() {
let collator = collator_en(Strength::Identical);
let mut k = [0u8; 5];
let res = collator.write_sort_key_to("áAbc", &mut k[..]);
assert!(matches!(res, Err(TooSmall { .. })));
}
#[test]
fn sort_key_to_slice_identical_too_long() {
let collator = collator_en(Strength::Identical);
let mut k = [0u8; 22];
let res = collator.write_sort_key_to("áAbc", &mut k[..]);
assert!(matches!(res, Err(TooSmall { .. })));
}
#[test]
fn sort_key_just_right() {
let collator = collator_en(Strength::Identical);
let mut k = [0u8; 0];
let res = collator.write_sort_key_to("áAbc", &mut k[..]);
let len = res.unwrap_err().length;
let mut k = vec![0u8; len - 1];
let res = collator.write_sort_key_to("áAbc", &mut k[..]);
let len = res.unwrap_err().length;
let mut k = vec![0u8; len];
let res = collator.write_sort_key_to("áAbc", &mut k[..]);
assert_eq!(res, Ok(len));
}
#[test]
fn sort_key_utf16_slice_too_small() {
let collator = collator_en(Strength::Identical);
const STR16: &[u16] = &[0x68, 0x65, 0x6c, 0x6c, 0x6f];
let mut k = [0u8; 4];
let res = collator.write_sort_key_utf16_to(STR16, &mut k[..]);
assert!(matches!(res, Err(TooSmall { .. })));
}
#[test]
fn sort_key_very_long() {
let collator = collator_en(Strength::Secondary);
let mut k = Vec::new();
let Ok(()) = collator.write_sort_key_to(&"a".repeat(300), &mut k);
}
#[test]
fn sort_key_case_level() {
let collator = collator_en_case_level(Strength::Tertiary);
let mut k = Vec::new();
let Ok(()) = collator.write_sort_key_to("aBc", &mut k);
}
#[test]
fn sort_key_case_level_empty() {
let collator = collator_en_case_level(Strength::Tertiary);
let mut k = Vec::new();
let Ok(()) = collator.write_sort_key_to("", &mut k);
}
fn check_sort_key_less(a: &[u16], b: &[u16]) {
let collator = collator_en(Strength::Identical);
let mut ak = Vec::new();
let Ok(()) = collator.write_sort_key_utf16_to(a, &mut ak);
let mut bk = Vec::new();
let Ok(()) = collator.write_sort_key_utf16_to(b, &mut bk);
assert!(ak < bk, "failed: {a:04x?} - {b:04x?}");
}
#[test]
fn sort_key_fffe_bug_6811() {
check_sort_key_less(
&[0xfffe, 0x0001, 0x0002, 0x0003],
&[0x0001, 0xfffe, 0x0002, 0x0003],
);
check_sort_key_less(
&[0x0001, 0xfffe, 0x0002, 0x0003],
&[0x0001, 0x0002, 0xfffe, 0x0003],
);
check_sort_key_less(
&[0x0001, 0x0002, 0xfffe, 0x0003],
&[0x0001, 0x0002, 0x0003, 0xfffe],
);
check_sort_key_less(&[0xfffe, 0x0000, 0x0000], &[0x0000, 0xfffe, 0x0000]);
check_sort_key_less(&[0x0000, 0xfffe, 0x0000], &[0x0000, 0x0000, 0xfffe]);
}
}