1use alloc::collections::VecDeque;
13use alloc::vec::Vec;
14
15use crate::elements::CharacterAndClassAndTrieValue;
16use crate::elements::CollationElement32;
17use crate::elements::Tag;
18use crate::elements::BACKWARD_COMBINING_MARKER;
19use crate::elements::CE_BUFFER_SIZE;
20use crate::elements::FALLBACK_CE32;
21use crate::elements::NON_ROUND_TRIP_MARKER;
22use crate::elements::{
23 char_from_u32, CollationElement, CollationElements, NonPrimary, FFFD_CE32,
24 HANGUL_SYLLABLE_MARKER, HIGH_ZEROS_MASK, JAMO_COUNT, LOW_ZEROS_MASK, NO_CE, NO_CE_PRIMARY,
25 NO_CE_QUATERNARY, NO_CE_SECONDARY, NO_CE_TERTIARY, OPTIMIZED_DIACRITICS_MAX_COUNT,
26 QUATERNARY_MASK,
27};
28use crate::options::CollatorOptionsBitField;
29use crate::options::{
30 AlternateHandling, CollatorOptions, MaxVariable, ResolvedCollatorOptions, Strength,
31};
32use crate::preferences::{CollationCaseFirst, CollationNumericOrdering, CollationType};
33use crate::provider::CollationData;
34use crate::provider::CollationDiacritics;
35use crate::provider::CollationDiacriticsV1;
36use crate::provider::CollationJamo;
37use crate::provider::CollationJamoV1;
38use crate::provider::CollationMetadataV1;
39use crate::provider::CollationReordering;
40use crate::provider::CollationReorderingV1;
41use crate::provider::CollationRootV1;
42use crate::provider::CollationSpecialPrimariesV1;
43use crate::provider::CollationSpecialPrimariesValidated;
44use crate::provider::CollationTailoringV1;
45use core::cmp::Ordering;
46use core::convert::{Infallible, TryFrom};
47use icu_normalizer::provider::DecompositionData;
48use icu_normalizer::provider::DecompositionTables;
49use icu_normalizer::provider::NormalizerNfdDataV1;
50use icu_normalizer::provider::NormalizerNfdTablesV1;
51use icu_normalizer::DecomposingNormalizerBorrowed;
52use icu_normalizer::Decomposition;
53use icu_provider::marker::ErasedMarker;
54use icu_provider::prelude::*;
55use smallvec::SmallVec;
56use utf16_iter::Utf16CharsEx;
57use utf8_iter::Utf8CharsEx;
58use zerovec::ule::AsULE;
59
60const LEVEL_SEPARATOR_BYTE: u8 = 1;
62
63const MERGE_SEPARATOR: char = '\u{fffe}';
68const MERGE_SEPARATOR_BYTE: u8 = 2;
69const MERGE_SEPARATOR_PRIMARY: u32 = 0x02000000;
70
71const PRIMARY_COMPRESSION_LOW_BYTE: u8 = 3;
76
77const PRIMARY_COMPRESSION_HIGH_BYTE: u8 = 0xff;
82
83const COMMON_BYTE: u8 = 5;
85const COMMON_WEIGHT16: u16 = 0x0500;
86
87const PRIMARY_LEVEL_FLAG: u8 = 0x01;
89const SECONDARY_LEVEL_FLAG: u8 = 0x02;
90const CASE_LEVEL_FLAG: u8 = 0x04;
91const TERTIARY_LEVEL_FLAG: u8 = 0x08;
92const QUATERNARY_LEVEL_FLAG: u8 = 0x10;
93
94const LEVEL_MASKS: [u8; Strength::Identical as usize + 1] = [
95 PRIMARY_LEVEL_FLAG,
96 PRIMARY_LEVEL_FLAG | SECONDARY_LEVEL_FLAG,
97 PRIMARY_LEVEL_FLAG | SECONDARY_LEVEL_FLAG | TERTIARY_LEVEL_FLAG,
98 PRIMARY_LEVEL_FLAG | SECONDARY_LEVEL_FLAG | TERTIARY_LEVEL_FLAG | QUATERNARY_LEVEL_FLAG,
99 0,
100 0,
101 0,
102 PRIMARY_LEVEL_FLAG | SECONDARY_LEVEL_FLAG | TERTIARY_LEVEL_FLAG | QUATERNARY_LEVEL_FLAG,
103];
104
105const WEIGHT_LOW: usize = 0;
107const WEIGHT_MIDDLE: usize = 1;
108const WEIGHT_HIGH: usize = 2;
109const WEIGHT_MAX_COUNT: usize = 3;
110
111const SEC_COMMON: [u8; 4] = [COMMON_BYTE, COMMON_BYTE + 0x20, COMMON_BYTE + 0x40, 0x21];
113
114const CASE_LOWER_FIRST_COMMON: [u8; 4] = [1, 7, 13, 7];
116
117const CASE_UPPER_FIRST_COMMON: [u8; 4] = [3, 0 , 15, 13];
119
120const TER_ONLY_COMMON: [u8; 4] = [COMMON_BYTE, COMMON_BYTE + 0x60, COMMON_BYTE + 0xc0, 0x61];
122
123const TER_LOWER_FIRST_COMMON: [u8; 4] = [COMMON_BYTE, COMMON_BYTE + 0x20, COMMON_BYTE + 0x40, 0x21];
125
126const TER_UPPER_FIRST_COMMON: [u8; 4] = [
128 COMMON_BYTE + 0x80,
129 COMMON_BYTE + 0x80 + 0x20,
130 COMMON_BYTE + 0x80 + 0x40,
131 0x21,
132];
133
134const QUAT_COMMON: [u8; 4] = [0x1c, 0x1c + 0x70, 0x1c + 0xe0, 0x71];
135const QUAT_SHIFTED_LIMIT_BYTE: u8 = QUAT_COMMON[WEIGHT_LOW] - 1; const SLOPE_MIN: i32 = 3;
139const SLOPE_MAX: i32 = 0xff;
140const SLOPE_MIDDLE: i32 = 0x81;
141const SLOPE_TAIL_COUNT: i32 = SLOPE_MAX - SLOPE_MIN + 1;
142const SLOPE_SINGLE: i32 = 80;
143const SLOPE_LEAD_2: i32 = 42;
144const SLOPE_LEAD_3: i32 = 3;
145
146const SLOPE_REACH_POS_1: i32 = SLOPE_SINGLE;
148const SLOPE_REACH_NEG_1: i32 = -SLOPE_SINGLE;
149
150const SLOPE_REACH_POS_2: i32 = SLOPE_LEAD_2 * SLOPE_TAIL_COUNT + (SLOPE_LEAD_2 - 1);
152const SLOPE_REACH_NEG_2: i32 = -SLOPE_REACH_POS_2 - 1;
153
154const SLOPE_REACH_POS_3: i32 = SLOPE_LEAD_3 * SLOPE_TAIL_COUNT * SLOPE_TAIL_COUNT
156 + (SLOPE_LEAD_3 - 1) * SLOPE_TAIL_COUNT
157 + (SLOPE_TAIL_COUNT - 1);
158const SLOPE_REACH_NEG_3: i32 = -SLOPE_REACH_POS_3 - 1;
159
160const SLOPE_START_POS_2: i32 = SLOPE_MIDDLE + SLOPE_SINGLE + 1;
162const SLOPE_START_POS_3: i32 = SLOPE_START_POS_2 + SLOPE_LEAD_2;
163const SLOPE_START_NEG_2: i32 = SLOPE_MIDDLE + SLOPE_REACH_NEG_1;
164const SLOPE_START_NEG_3: i32 = SLOPE_START_NEG_2 - SLOPE_LEAD_2;
165
166struct AnyQuaternaryAccumulator(u32);
167
168impl AnyQuaternaryAccumulator {
169 #[inline(always)]
170 pub fn new() -> Self {
171 AnyQuaternaryAccumulator(0)
172 }
173 #[inline(always)]
174 pub fn accumulate(&mut self, non_primary: NonPrimary) {
175 self.0 |= non_primary.bits()
176 }
177 #[inline(always)]
178 pub fn has_quaternary(&self) -> bool {
179 self.0 & u32::from(QUATERNARY_MASK) != 0
180 }
181}
182
183#[inline(always)]
186fn in_inclusive_range16(i: u16, start: u16, end: u16) -> bool {
187 i.wrapping_sub(start) <= (end - start)
188}
189
190#[cfg(feature = "latin1")]
194trait Latin1Chars {
195 fn latin1_chars(&self) -> impl DoubleEndedIterator<Item = char>;
196}
197
198#[cfg(feature = "latin1")]
199impl Latin1Chars for [u8] {
200 fn latin1_chars(&self) -> impl DoubleEndedIterator<Item = char> {
201 self.iter().map(|b| char::from(*b))
202 }
203}
204
205#[cfg(feature = "latin1")]
213fn split_prefix_latin1<'a, 'b>(left: &'a [u8], right: &'b [u8]) -> (&'a [u8], &'a [u8], &'b [u8]) {
214 let i = left
215 .iter()
216 .zip(right.iter())
217 .take_while(|(l, r)| l == r)
218 .count();
219 if let Some((head, left_tail)) = left.split_at_checked(i) {
220 if let Some(right_tail) = right.get(i..) {
221 return (head, left_tail, right_tail);
222 }
223 }
224 (&[], left, right)
225}
226
227#[cfg(feature = "latin1")]
235fn split_prefix_latin1_utf16<'a, 'b>(
236 left: &'a [u8],
237 right: &'b [u16],
238) -> (&'a [u8], &'a [u8], &'b [u16]) {
239 let i = left
240 .iter()
241 .zip(right.iter())
242 .take_while(|(l, r)| u16::from(**l) == **r)
243 .count();
244 if let Some((head, left_tail)) = left.split_at_checked(i) {
245 if let Some(right_tail) = right.get(i..) {
246 return (head, left_tail, right_tail);
247 }
248 }
249 (&[], left, right)
250}
251
252fn split_prefix_u16<'a, 'b>(
260 left: &'a [u16],
261 right: &'b [u16],
262) -> (&'a [u16], &'a [u16], &'b [u16]) {
263 let mut i = left
264 .iter()
265 .zip(right.iter())
266 .take_while(|(l, r)| l == r)
267 .count();
268 if i != 0 {
269 if let Some(&last) = left.get(i.wrapping_sub(1)) {
270 if in_inclusive_range16(last, 0xD800, 0xDBFF) {
271 i -= 1;
272 }
273 if let Some((head, left_tail)) = left.split_at_checked(i) {
274 if let Some(right_tail) = right.get(i..) {
275 return (head, left_tail, right_tail);
276 }
277 }
278 }
279 }
280 (&[], left, right)
281}
282
283fn split_prefix_u8<'a, 'b>(left: &'a [u8], right: &'b [u8]) -> (&'a [u8], &'a [u8], &'b [u8]) {
291 let mut i = left
292 .iter()
293 .zip(right.iter())
294 .take_while(|(l, r)| l == r)
295 .count();
296 if i != 0 {
297 if let Some(right_first) = right.get(i) {
305 if (right_first & 0b1100_0000) == 0b1000_0000 {
306 i -= 1;
307 }
308 }
309 while i != 0 {
310 if let Some(left_first) = left.get(i) {
311 if (left_first & 0b1100_0000) == 0b1000_0000 {
312 i -= 1;
313 continue;
314 }
315 }
316 break;
317 }
318 if let Some((head, left_tail)) = left.split_at_checked(i) {
319 if let Some(right_tail) = right.get(i..) {
320 return (head, left_tail, right_tail);
321 }
322 }
323 }
324 (&[], left, right)
325}
326
327fn split_prefix<'a, 'b>(left: &'a str, right: &'b str) -> (&'a str, &'a str, &'b str) {
333 let left_bytes = left.as_bytes();
334 let right_bytes = right.as_bytes();
335 let mut i = left_bytes
336 .iter()
337 .zip(right_bytes.iter())
338 .take_while(|(l, r)| l == r)
339 .count();
340 if i != 0 {
341 loop {
358 if let Some(left_first) = left_bytes.get(i) {
359 if (left_first & 0b1100_0000) == 0b1000_0000 {
360 i -= 1;
361 continue;
362 }
363 }
364 break;
365 }
366 if let Some((head, left_tail)) = left.split_at_checked(i) {
370 if let Some(right_tail) = right.get(i..) {
371 return (head, left_tail, right_tail);
372 }
373 }
374 }
375 ("", left, right)
376}
377
378#[derive(Debug)]
381struct LocaleSpecificDataHolder {
382 tailoring: Option<DataPayload<CollationTailoringV1>>,
383 diacritics: DataPayload<CollationDiacriticsV1>,
384 reordering: Option<DataPayload<CollationReorderingV1>>,
385 merged_options: CollatorOptionsBitField,
386 lithuanian_dot_above: bool,
387}
388
389icu_locale_core::preferences::define_preferences!(
390 [Copy]
409 CollatorPreferences,
410 {
411 collation_type: CollationType,
413 case_first: CollationCaseFirst,
416 numeric_ordering: CollationNumericOrdering
420 }
421);
422
423impl LocaleSpecificDataHolder {
424 fn try_new_unstable_internal<D>(
426 provider: &D,
427 prefs: CollatorPreferences,
428 options: CollatorOptions,
429 ) -> Result<Self, DataError>
430 where
431 D: DataProvider<CollationTailoringV1>
432 + DataProvider<CollationDiacriticsV1>
433 + DataProvider<CollationMetadataV1>
434 + DataProvider<CollationReorderingV1>
435 + ?Sized,
436 {
437 let marker_attributes = prefs
438 .collation_type
439 .as_ref()
440 .map(|c| DataMarkerAttributes::from_str_or_panic(c.as_str()))
442 .unwrap_or_default();
443
444 let data_locale = CollationTailoringV1::make_locale(prefs.locale_preferences);
445 let req = DataRequest {
446 id: DataIdentifierBorrowed::for_marker_attributes_and_locale(
447 marker_attributes,
448 &data_locale,
449 ),
450 metadata: {
451 let mut metadata = DataRequestMetadata::default();
452 metadata.silent = true;
453 metadata
454 },
455 };
456
457 let fallback_req = DataRequest {
458 id: DataIdentifierBorrowed::for_marker_attributes_and_locale(
459 Default::default(),
460 &data_locale,
461 ),
462 ..Default::default()
463 };
464
465 let metadata_payload: DataPayload<crate::provider::CollationMetadataV1> = provider
466 .load(req)
467 .or_else(|_| provider.load(fallback_req))?
468 .payload;
469
470 let metadata = metadata_payload.get();
471
472 let tailoring: Option<DataPayload<crate::provider::CollationTailoringV1>> =
473 if metadata.tailored() {
474 Some(
475 provider
476 .load(req)
477 .or_else(|_| provider.load(fallback_req))?
478 .payload,
479 )
480 } else {
481 None
482 };
483
484 let reordering: Option<DataPayload<crate::provider::CollationReorderingV1>> =
485 if metadata.reordering() {
486 Some(
487 provider
488 .load(req)
489 .or_else(|_| provider.load(fallback_req))?
490 .payload,
491 )
492 } else {
493 None
494 };
495
496 if let Some(reordering) = &reordering {
497 if reordering.get().reorder_table.len() != 256 {
498 return Err(DataError::custom("invalid").with_marker(CollationReorderingV1::INFO));
499 }
500 }
501
502 let tailored_diacritics = metadata.tailored_diacritics();
503 let diacritics: DataPayload<CollationDiacriticsV1> = provider
504 .load(if tailored_diacritics {
505 req
506 } else {
507 Default::default()
508 })?
509 .payload;
510
511 if tailored_diacritics {
512 if diacritics.get().secondaries.len() > OPTIMIZED_DIACRITICS_MAX_COUNT {
518 return Err(DataError::custom("invalid").with_marker(CollationDiacriticsV1::INFO));
519 }
520 } else if diacritics.get().secondaries.len() != OPTIMIZED_DIACRITICS_MAX_COUNT {
521 return Err(DataError::custom("invalid").with_marker(CollationDiacriticsV1::INFO));
522 }
523
524 let mut altered_defaults = CollatorOptionsBitField::default();
525
526 if metadata.alternate_shifted() {
527 altered_defaults.set_alternate_handling(Some(AlternateHandling::Shifted));
528 }
529 if metadata.backward_second_level() {
530 altered_defaults.set_backward_second_level(Some(true));
531 }
532
533 altered_defaults.set_case_first(Some(metadata.case_first()));
534 altered_defaults.set_max_variable(Some(metadata.max_variable()));
535
536 let mut merged_options = CollatorOptionsBitField::from(options);
537 merged_options.set_case_first(prefs.case_first);
538 merged_options.set_numeric_from_enum(prefs.numeric_ordering);
539 merged_options.set_defaults(altered_defaults);
540
541 Ok(LocaleSpecificDataHolder {
542 tailoring,
543 diacritics,
544 merged_options,
545 reordering,
546 lithuanian_dot_above: metadata.lithuanian_dot_above(),
547 })
548 }
549}
550
551#[derive(Debug)]
553pub struct Collator {
554 special_primaries: DataPayload<ErasedMarker<CollationSpecialPrimariesValidated<'static>>>,
555 root: DataPayload<CollationRootV1>,
556 tailoring: Option<DataPayload<CollationTailoringV1>>,
557 jamo: DataPayload<CollationJamoV1>,
558 diacritics: DataPayload<CollationDiacriticsV1>,
559 options: CollatorOptionsBitField,
560 reordering: Option<DataPayload<CollationReorderingV1>>,
561 decompositions: DataPayload<NormalizerNfdDataV1>,
562 tables: DataPayload<NormalizerNfdTablesV1>,
563 lithuanian_dot_above: bool,
564}
565
566impl Collator {
567 pub fn as_borrowed(&self) -> CollatorBorrowed<'_> {
569 CollatorBorrowed {
570 special_primaries: self.special_primaries.get(),
571 root: self.root.get(),
572 tailoring: self.tailoring.as_ref().map(|s| s.get()),
573 jamo: self.jamo.get(),
574 diacritics: self.diacritics.get(),
575 options: self.options,
576 reordering: self.reordering.as_ref().map(|s| s.get()),
577 decompositions: self.decompositions.get(),
578 tables: self.tables.get(),
579 lithuanian_dot_above: self.lithuanian_dot_above,
580 }
581 }
582
583 #[cfg(feature = "compiled_data")]
585 pub fn try_new(
586 prefs: CollatorPreferences,
587 options: CollatorOptions,
588 ) -> Result<CollatorBorrowed<'static>, DataError> {
589 CollatorBorrowed::try_new(prefs, options)
590 }
591
592 icu_provider::gen_buffer_data_constructors!(
593 (prefs: CollatorPreferences, options: CollatorOptions) -> error: DataError,
594 functions: [
595 try_new: skip,
596 try_new_with_buffer_provider,
597 try_new_unstable,
598 Self
599 ]
600 );
601
602 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::try_new)]
603 pub fn try_new_unstable<D>(
604 provider: &D,
605 prefs: CollatorPreferences,
606 options: CollatorOptions,
607 ) -> Result<Self, DataError>
608 where
609 D: DataProvider<CollationSpecialPrimariesV1>
610 + DataProvider<CollationRootV1>
611 + DataProvider<CollationTailoringV1>
612 + DataProvider<CollationDiacriticsV1>
613 + DataProvider<CollationJamoV1>
614 + DataProvider<CollationMetadataV1>
615 + DataProvider<CollationReorderingV1>
616 + DataProvider<NormalizerNfdDataV1>
617 + DataProvider<NormalizerNfdTablesV1>
618 + ?Sized,
619 {
620 Self::try_new_unstable_internal(
621 provider,
622 provider.load(Default::default())?.payload,
623 provider.load(Default::default())?.payload,
624 provider.load(Default::default())?.payload,
625 provider.load(Default::default())?.payload,
626 provider.load(Default::default())?.payload,
627 prefs,
628 options,
629 )
630 }
631
632 #[expect(clippy::too_many_arguments)]
633 fn try_new_unstable_internal<D>(
634 provider: &D,
635 root: DataPayload<CollationRootV1>,
636 decompositions: DataPayload<NormalizerNfdDataV1>,
637 tables: DataPayload<NormalizerNfdTablesV1>,
638 jamo: DataPayload<CollationJamoV1>,
639 special_primaries: DataPayload<CollationSpecialPrimariesV1>,
640 prefs: CollatorPreferences,
641 options: CollatorOptions,
642 ) -> Result<Self, DataError>
643 where
644 D: DataProvider<CollationRootV1>
645 + DataProvider<CollationTailoringV1>
646 + DataProvider<CollationDiacriticsV1>
647 + DataProvider<CollationMetadataV1>
648 + DataProvider<CollationReorderingV1>
649 + ?Sized,
650 {
651 let locale_dependent =
652 LocaleSpecificDataHolder::try_new_unstable_internal(provider, prefs, options)?;
653
654 if jamo.get().ce32s.len() != JAMO_COUNT {
656 return Err(DataError::custom("invalid").with_marker(CollationJamoV1::INFO));
657 }
658
659 if special_primaries.get().last_primaries.len() <= (MaxVariable::Currency as usize) {
662 return Err(DataError::custom("invalid").with_marker(CollationSpecialPrimariesV1::INFO));
663 }
664 let special_primaries = special_primaries.map_project(|csp, _| {
665 let compressible_bytes = (csp.last_primaries.len()
666 == MaxVariable::Currency as usize + 16)
667 .then(|| {
668 csp.last_primaries
669 .as_maybe_borrowed()?
670 .as_ule_slice()
671 .get((MaxVariable::Currency as usize)..)?
672 .try_into()
673 .ok()
674 })
675 .flatten()
676 .unwrap_or(
677 CollationSpecialPrimariesValidated::HARDCODED_COMPRESSIBLE_BYTES_FALLBACK,
678 );
679
680 CollationSpecialPrimariesValidated {
681 last_primaries: csp.last_primaries.truncated(MaxVariable::Currency as usize),
682 numeric_primary: csp.numeric_primary,
683 compressible_bytes,
684 }
685 });
686
687 Ok(Collator {
688 special_primaries,
689 root,
690 tailoring: locale_dependent.tailoring,
691 jamo,
692 diacritics: locale_dependent.diacritics,
693 options: locale_dependent.merged_options,
694 reordering: locale_dependent.reordering,
695 decompositions,
696 tables,
697 lithuanian_dot_above: locale_dependent.lithuanian_dot_above,
698 })
699 }
700}
701
702macro_rules! compare {
703 ($(#[$meta:meta])*,
704 $compare:ident,
705 $left_slice:ty,
706 $right_slice:ty,
707 $split_prefix:ident,
708 $left_to_iter:ident,
709 $right_to_iter:ident,
710 ) => {
711 $(#[$meta])*
712 pub fn $compare(&self, left: &$left_slice, right: &$right_slice) -> Ordering {
713 let (head, left_tail, right_tail) = $split_prefix(left, right);
714 if left_tail.is_empty() && right_tail.is_empty() {
715 return Ordering::Equal;
716 }
717 let ret = self.compare_impl(left_tail.$left_to_iter(), right_tail.$right_to_iter(), head.$left_to_iter().rev());
718 if self.options.strength() == Strength::Identical && ret == Ordering::Equal {
719 return Decomposition::new(left_tail.$left_to_iter(), self.decompositions, self.tables).map(|c| if c != MERGE_SEPARATOR { c as i32 } else { -1i32 }).cmp(
720 Decomposition::new(right_tail.$right_to_iter(), self.decompositions, self.tables).map(|c| if c != MERGE_SEPARATOR { c as i32 } else { -1i32 }),
721 );
722 }
723 ret
724 }
725 }
726}
727
728#[derive(Debug)]
731pub struct CollatorBorrowed<'a> {
732 special_primaries: &'a CollationSpecialPrimariesValidated<'a>,
733 root: &'a CollationData<'a>,
734 tailoring: Option<&'a CollationData<'a>>,
735 jamo: &'a CollationJamo<'a>,
736 diacritics: &'a CollationDiacritics<'a>,
737 options: CollatorOptionsBitField,
738 reordering: Option<&'a CollationReordering<'a>>,
739 decompositions: &'a DecompositionData<'a>,
740 tables: &'a DecompositionTables<'a>,
741 lithuanian_dot_above: bool,
742}
743
744impl CollatorBorrowed<'static> {
745 #[cfg(feature = "compiled_data")]
747 pub fn try_new(
748 prefs: CollatorPreferences,
749 options: CollatorOptions,
750 ) -> Result<Self, DataError> {
751 let provider = &crate::provider::Baked;
755 let decompositions = icu_normalizer::provider::Baked::SINGLETON_NORMALIZER_NFD_DATA_V1;
756 let tables = icu_normalizer::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1;
757 let root = crate::provider::Baked::SINGLETON_COLLATION_ROOT_V1;
758 let jamo = crate::provider::Baked::SINGLETON_COLLATION_JAMO_V1;
759
760 let locale_dependent =
761 LocaleSpecificDataHolder::try_new_unstable_internal(provider, prefs, options)?;
762
763 const _: () = assert!(
765 crate::provider::Baked::SINGLETON_COLLATION_JAMO_V1
766 .ce32s
767 .as_slice()
768 .len()
769 == JAMO_COUNT
770 );
771
772 const _: () = assert!(
775 crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
776 .last_primaries
777 .as_slice()
778 .len()
779 > (MaxVariable::Currency as usize)
780 );
781
782 let special_primaries = const {
783 &CollationSpecialPrimariesValidated {
784 last_primaries: zerovec::ZeroSlice::from_ule_slice(
785 crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
786 .last_primaries
787 .as_slice()
788 .as_ule_slice()
789 .split_at(MaxVariable::Currency as usize)
790 .0,
791 )
792 .as_zerovec(),
793 numeric_primary: crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
794 .numeric_primary,
795 compressible_bytes: {
796 const C: &[<u16 as AsULE>::ULE] =
797 crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
798 .last_primaries
799 .as_slice()
800 .as_ule_slice();
801 if C.len() == MaxVariable::Currency as usize + 16 {
802 let i = MaxVariable::Currency as usize;
803 #[allow(clippy::indexing_slicing)] &[
805 C[i],
806 C[i + 1],
807 C[i + 2],
808 C[i + 3],
809 C[i + 4],
810 C[i + 5],
811 C[i + 6],
812 C[i + 7],
813 C[i + 8],
814 C[i + 9],
815 C[i + 10],
816 C[i + 11],
817 C[i + 12],
818 C[i + 13],
819 C[i + 14],
820 C[i + 15],
821 ]
822 } else {
823 CollationSpecialPrimariesValidated::HARDCODED_COMPRESSIBLE_BYTES_FALLBACK
824 }
825 },
826 }
827 };
828
829 #[expect(clippy::unwrap_used)]
832 Ok(CollatorBorrowed {
833 special_primaries,
834 root,
835 tailoring: locale_dependent.tailoring.map(|s| s.get_static().unwrap()),
837 jamo,
838 diacritics: locale_dependent.diacritics.get_static().unwrap(),
840 options: locale_dependent.merged_options,
841 reordering: locale_dependent.reordering.map(|s| s.get_static().unwrap()),
843 decompositions,
844 tables,
845 lithuanian_dot_above: locale_dependent.lithuanian_dot_above,
846 })
847 }
848
849 pub const fn static_to_owned(self) -> Collator {
854 Collator {
855 special_primaries: DataPayload::from_static_ref(self.special_primaries),
856 root: DataPayload::from_static_ref(self.root),
857 tailoring: if let Some(s) = self.tailoring {
858 Some(DataPayload::from_static_ref(s))
860 } else {
861 None
862 },
863 jamo: DataPayload::from_static_ref(self.jamo),
864 diacritics: DataPayload::from_static_ref(self.diacritics),
865 options: self.options,
866 reordering: if let Some(s) = self.reordering {
867 Some(DataPayload::from_static_ref(s))
869 } else {
870 None
871 },
872 decompositions: DataPayload::from_static_ref(self.decompositions),
873 tables: DataPayload::from_static_ref(self.tables),
874 lithuanian_dot_above: self.lithuanian_dot_above,
875 }
876 }
877}
878
879macro_rules! collation_elements {
880 ($self:expr, $chars:expr, $tailoring:expr, $numeric_primary:expr) => {{
881 let jamo = <&[<u32 as AsULE>::ULE; JAMO_COUNT]>::try_from($self.jamo.ce32s.as_ule_slice());
882
883 let jamo = jamo.unwrap();
884
885 CollationElements::new(
886 $chars,
887 $self.root,
888 $tailoring,
889 jamo,
890 &$self.diacritics.secondaries,
891 $self.decompositions,
892 $self.tables,
893 $numeric_primary,
894 $self.lithuanian_dot_above,
895 )
896 }};
897}
898
899impl CollatorBorrowed<'_> {
900 pub fn resolved_options(&self) -> ResolvedCollatorOptions {
903 self.options.into()
904 }
905
906 compare!(
907 ,
909 compare,
910 str,
911 str,
912 split_prefix,
913 chars,
914 chars,
915 );
916
917 compare!(
918 ,
922 compare_utf8,
923 [u8],
924 [u8],
925 split_prefix_u8,
926 chars,
927 chars,
928 );
929
930 compare!(
931 ,
934 compare_utf16,
935 [u16],
936 [u16],
937 split_prefix_u16,
938 chars,
939 chars,
940 );
941
942 compare!(
943 #[cfg(feature = "latin1")]
947 ,
948 compare_latin1,
949 [u8],
950 [u8],
951 split_prefix_latin1,
952 latin1_chars,
953 latin1_chars,
954 );
955
956 compare!(
957 #[cfg(feature = "latin1")]
966 ,
967 compare_latin1_utf16,
968 [u8],
969 [u16],
970 split_prefix_latin1_utf16,
971 latin1_chars,
972 chars,
973 );
974
975 #[inline(always)]
976 fn tailoring_or_root(&self) -> &CollationData<'_> {
977 if let Some(tailoring) = &self.tailoring {
978 tailoring
979 } else {
980 self.root
994 }
995 }
996
997 #[inline(always)]
998 fn numeric_primary(&self) -> Option<u8> {
999 if self.options.numeric() {
1000 Some(self.special_primaries.numeric_primary)
1001 } else {
1002 None
1003 }
1004 }
1005
1006 #[inline(always)]
1007 fn variable_top(&self) -> u32 {
1008 if self.options.alternate_handling() == AlternateHandling::NonIgnorable {
1009 0
1010 } else {
1011 self.special_primaries
1013 .last_primary_for_group(self.options.max_variable())
1014 + 1
1015 }
1016 }
1017
1018 fn compare_impl<
1024 L: Iterator<Item = char>,
1025 R: Iterator<Item = char>,
1026 H: Iterator<Item = char>,
1027 >(
1028 &self,
1029 left_chars: L,
1030 right_chars: R,
1031 mut head_chars: H,
1032 ) -> Ordering {
1033 let mut left_ces: SmallVec<[CollationElement; CE_BUFFER_SIZE]> = SmallVec::new();
1048 let mut right_ces: SmallVec<[CollationElement; CE_BUFFER_SIZE]> = SmallVec::new();
1049
1050 let mut any_variable = false;
1053 let variable_top = self.variable_top();
1054
1055 let tailoring = self.tailoring_or_root();
1056 let numeric_primary = self.numeric_primary();
1057 let mut left = collation_elements!(self, left_chars, tailoring, numeric_primary);
1058 let mut right = collation_elements!(self, right_chars, tailoring, numeric_primary);
1059
1060 #[expect(clippy::never_loop)]
1081 'prefix: loop {
1082 if let Some(mut head_last_c) = head_chars.next() {
1083 let norm_trie = &self.decompositions.trie;
1084 let mut head_last = CharacterAndClassAndTrieValue::new_with_trie_val(
1085 head_last_c,
1086 norm_trie.get(head_last_c),
1087 );
1088 let mut head_last_ce32 = CollationElement32::default();
1089 let mut head_last_ok = false;
1090 if let Some(left_different) = left.iter_next_before_init() {
1091 left.prepend_upcoming_before_init(left_different.clone());
1092 if let Some(right_different) = right.iter_next_before_init() {
1093 right.prepend_upcoming_before_init(right_different.clone());
1095
1096 #[expect(clippy::never_loop)]
1154 loop {
1155 let decomposition = head_last.trie_val;
1158 if (decomposition
1159 & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER))
1160 == 0
1161 {
1162 } else if ((decomposition & HIGH_ZEROS_MASK) != 0)
1166 && ((decomposition & LOW_ZEROS_MASK) != 0)
1167 {
1168 head_last_c = char_from_u32(decomposition & 0x7FFF);
1171 } else if decomposition == HANGUL_SYLLABLE_MARKER {
1172 head_last_ce32 = FFFD_CE32;
1173 } else {
1174 break;
1175 }
1176 head_last_ok = true;
1177
1178 let mut left_ce32 = CollationElement32::default();
1179 let mut right_ce32 = CollationElement32::default();
1180 let left_c;
1181 let right_c;
1182
1183 let decomposition = left_different.trie_val;
1184 if (decomposition
1185 & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER))
1186 == 0
1187 {
1188 left_c = left_different.character();
1189 } else if ((decomposition & HIGH_ZEROS_MASK) != 0)
1190 && ((decomposition & LOW_ZEROS_MASK) != 0)
1191 {
1192 left_c = char_from_u32(decomposition & 0x7FFF);
1195 } else if decomposition == HANGUL_SYLLABLE_MARKER {
1196 left_c = '\u{0}';
1197 left_ce32 = FFFD_CE32;
1198 } else {
1199 break;
1200 }
1201
1202 let decomposition = right_different.trie_val;
1203 if (decomposition
1204 & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER))
1205 == 0
1206 {
1207 right_c = right_different.character();
1208 } else if ((decomposition & HIGH_ZEROS_MASK) != 0)
1209 && ((decomposition & LOW_ZEROS_MASK) != 0)
1210 {
1211 right_c = char_from_u32(decomposition & 0x7FFF);
1214 } else if decomposition == HANGUL_SYLLABLE_MARKER {
1215 right_c = '\u{0}';
1216 right_ce32 = FFFD_CE32;
1217 } else {
1218 break;
1219 }
1220
1221 if head_last_ce32 == CollationElement32::default() {
1225 head_last_ce32 = tailoring.ce32_for_char(head_last_c);
1226 if head_last_ce32 == FALLBACK_CE32 {
1227 head_last_ce32 = self.root.ce32_for_char(head_last_c);
1228 }
1229 if head_last_ce32.tag_checked() == Some(Tag::Contraction)
1230 && head_last_ce32.at_least_one_suffix_contains_starter()
1231 {
1232 break;
1233 }
1234 }
1235 if left_ce32 == CollationElement32::default() {
1238 left_ce32 = tailoring.ce32_for_char(left_c);
1239 if left_ce32 == FALLBACK_CE32 {
1240 left_ce32 = self.root.ce32_for_char(left_c);
1241 }
1242 if left_ce32.tag_checked() == Some(Tag::Prefix) {
1243 break;
1244 }
1245 }
1246 if right_ce32 == CollationElement32::default() {
1247 right_ce32 = tailoring.ce32_for_char(right_c);
1248 if right_ce32 == FALLBACK_CE32 {
1249 right_ce32 = self.root.ce32_for_char(right_c);
1250 }
1251 if right_ce32.tag_checked() == Some(Tag::Prefix) {
1252 break;
1253 }
1254 }
1255 if self.options.numeric()
1256 && head_last_ce32.tag_checked() == Some(Tag::Digit)
1257 && (left_ce32.tag_checked() == Some(Tag::Digit)
1258 || right_ce32.tag_checked() == Some(Tag::Digit))
1259 {
1260 break;
1261 }
1262 break 'prefix;
1264 }
1265 }
1266 }
1267 let mut tail_first_c;
1268 let mut tail_first_ce32;
1269 let mut tail_first_ok;
1270 loop {
1271 left.prepend_upcoming_before_init(head_last.clone());
1273 right.prepend_upcoming_before_init(head_last.clone());
1274
1275 tail_first_c = head_last_c;
1276 tail_first_ce32 = head_last_ce32;
1277 tail_first_ok = head_last_ok;
1278
1279 head_last_c = if let Some(head_last_c) = head_chars.next() {
1280 head_last_c
1281 } else {
1282 break 'prefix;
1285 };
1286 let decomposition = norm_trie.get(head_last_c);
1287 head_last = CharacterAndClassAndTrieValue::new_with_trie_val(
1288 head_last_c,
1289 decomposition,
1290 );
1291 head_last_ce32 = CollationElement32::default();
1292 head_last_ok = false;
1293
1294 if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0 {
1295 } else if ((decomposition & HIGH_ZEROS_MASK) != 0)
1299 && ((decomposition & LOW_ZEROS_MASK) != 0)
1300 {
1301 head_last_c = char_from_u32(decomposition & 0x7FFF);
1304 } else if decomposition == HANGUL_SYLLABLE_MARKER {
1305 head_last_ce32 = FFFD_CE32;
1306 } else {
1307 continue;
1308 }
1309 head_last_ok = true;
1310 if !tail_first_ok {
1311 continue;
1312 }
1313 if head_last_ce32 == CollationElement32::default() {
1316 head_last_ce32 = tailoring.ce32_for_char(head_last_c);
1317 if head_last_ce32 == FALLBACK_CE32 {
1318 head_last_ce32 = self.root.ce32_for_char(head_last_c);
1319 }
1320 if head_last_ce32.tag_checked() == Some(Tag::Contraction)
1321 && head_last_ce32.at_least_one_suffix_contains_starter()
1322 {
1323 continue;
1324 }
1325 }
1326 if tail_first_ce32 == CollationElement32::default() {
1330 tail_first_ce32 = tailoring.ce32_for_char(tail_first_c);
1331 if tail_first_ce32 == FALLBACK_CE32 {
1332 tail_first_ce32 = self.root.ce32_for_char(tail_first_c);
1333 }
1334 } if tail_first_ce32.tag_checked() == Some(Tag::Prefix) {
1336 continue;
1337 }
1338 if self.options.numeric()
1339 && head_last_ce32.tag_checked() == Some(Tag::Digit)
1340 && tail_first_ce32.tag_checked() == Some(Tag::Digit)
1341 {
1342 continue;
1343 }
1344 break 'prefix;
1346 }
1347 } else {
1348 break 'prefix;
1350 }
1351 }
1353
1354 left.init();
1357 right.init();
1358
1359 loop {
1360 let mut left_primary;
1361 'left_primary_loop: loop {
1362 let ce = left.next();
1363 left_primary = ce.primary();
1364 if !(left_primary < variable_top && left_primary > MERGE_SEPARATOR_PRIMARY) {
1367 left_ces.push(ce);
1368 } else {
1369 any_variable = true;
1372 left_ces.push(ce.clone_with_non_primary_zeroed());
1377 loop {
1378 let ce = left.next();
1380 left_primary = ce.primary();
1381 if left_primary != 0
1382 && !(left_primary < variable_top
1383 && left_primary > MERGE_SEPARATOR_PRIMARY)
1384 {
1385 left_ces.push(ce);
1387 break 'left_primary_loop;
1388 }
1389 left_ces.push(ce.clone_with_non_primary_zeroed());
1392 }
1393 }
1394 if left_primary != 0 {
1395 break;
1396 }
1397 }
1398 let mut right_primary;
1399 'right_primary_loop: loop {
1400 let ce = right.next();
1401 right_primary = ce.primary();
1402 if !(right_primary < variable_top && right_primary > MERGE_SEPARATOR_PRIMARY) {
1405 right_ces.push(ce);
1406 } else {
1407 any_variable = true;
1410 right_ces.push(ce.clone_with_non_primary_zeroed());
1415 loop {
1416 let ce = right.next();
1418 right_primary = ce.primary();
1419 if right_primary != 0
1420 && !(right_primary < variable_top
1421 && right_primary > MERGE_SEPARATOR_PRIMARY)
1422 {
1423 right_ces.push(ce);
1425 break 'right_primary_loop;
1426 }
1427 right_ces.push(ce.clone_with_non_primary_zeroed());
1430 }
1431 }
1432 if right_primary != 0 {
1433 break;
1434 }
1435 }
1436 if left_primary != right_primary {
1437 if let Some(reordering) = &self.reordering {
1438 left_primary = reordering.reorder(left_primary);
1439 right_primary = reordering.reorder(right_primary);
1440 }
1441 if left_primary < right_primary {
1442 return Ordering::Less;
1443 }
1444 return Ordering::Greater;
1445 }
1446 if left_primary == NO_CE_PRIMARY {
1447 break;
1448 }
1449 }
1450
1451 debug_assert_eq!(left_ces.last(), Some(&NO_CE));
1455 debug_assert_eq!(right_ces.last(), Some(&NO_CE));
1456
1457 if self.options.strength() >= Strength::Secondary {
1469 if !self.options.backward_second_level() {
1470 let mut left_iter = left_ces.iter();
1471 let mut right_iter = right_ces.iter();
1472 let mut left_secondary;
1473 let mut right_secondary;
1474 loop {
1475 loop {
1476 left_secondary = left_iter.next().unwrap_or_default().secondary();
1477 if left_secondary != 0 {
1478 break;
1479 }
1480 }
1481 loop {
1482 right_secondary = right_iter.next().unwrap_or_default().secondary();
1483 if right_secondary != 0 {
1484 break;
1485 }
1486 }
1487 if left_secondary != right_secondary {
1488 if left_secondary < right_secondary {
1489 return Ordering::Less;
1490 }
1491 return Ordering::Greater;
1492 }
1493 if left_secondary == NO_CE_SECONDARY {
1494 break;
1495 }
1496 }
1497 } else {
1498 let mut left_remaining = &left_ces[..];
1499 let mut right_remaining = &right_ces[..];
1500 loop {
1501 if left_remaining.is_empty() {
1502 debug_assert!(right_remaining.is_empty());
1503 break;
1504 }
1505 let (left_prefix, right_prefix) = {
1506 let mut left_iter = left_remaining.iter();
1507 loop {
1508 let left_primary = left_iter.next().unwrap_or_default().primary();
1509 if left_primary != 0 && left_primary <= MERGE_SEPARATOR_PRIMARY {
1510 break;
1511 }
1512 debug_assert_ne!(left_primary, NO_CE_PRIMARY);
1513 }
1514 let left_new_remaining = left_iter.as_slice();
1515 #[expect(clippy::indexing_slicing)]
1517 let left_prefix =
1518 &left_remaining[..left_remaining.len() - 1 - left_new_remaining.len()];
1519 left_remaining = left_new_remaining;
1520
1521 let mut right_iter = right_remaining.iter();
1522 loop {
1523 let right_primary = right_iter.next().unwrap_or_default().primary();
1524 if right_primary != 0 && right_primary <= MERGE_SEPARATOR_PRIMARY {
1525 break;
1526 }
1527 debug_assert_ne!(right_primary, NO_CE_PRIMARY);
1528 }
1529 let right_new_remaining = right_iter.as_slice();
1530 #[expect(clippy::indexing_slicing)]
1532 let right_prefix = &right_remaining
1533 [..right_remaining.len() - 1 - right_new_remaining.len()];
1534 right_remaining = right_new_remaining;
1535
1536 (left_prefix, right_prefix)
1537 };
1538 let mut left_iter = left_prefix.iter();
1539 let mut right_iter = right_prefix.iter();
1540
1541 let mut left_secondary;
1542 let mut right_secondary;
1543 loop {
1544 loop {
1545 left_secondary = left_iter.next_back().unwrap_or_default().secondary();
1546 if left_secondary != 0 {
1547 break;
1548 }
1549 }
1550 loop {
1551 right_secondary =
1552 right_iter.next_back().unwrap_or_default().secondary();
1553 if right_secondary != 0 {
1554 break;
1555 }
1556 }
1557 if left_secondary != right_secondary {
1558 if left_secondary < right_secondary {
1559 return Ordering::Less;
1560 }
1561 return Ordering::Greater;
1562 }
1563 if left_secondary == NO_CE_SECONDARY {
1564 break;
1565 }
1566 }
1567 }
1568 }
1569 }
1570
1571 if self.options.case_level() {
1572 if self.options.strength() == Strength::Primary {
1573 let mut left_non_primary;
1579 let mut right_non_primary;
1580 let mut left_case;
1581 let mut right_case;
1582 let mut left_iter = left_ces.iter();
1583 let mut right_iter = right_ces.iter();
1584 loop {
1585 loop {
1586 let ce = left_iter.next().unwrap_or_default();
1587 left_non_primary = ce.non_primary();
1588 if !ce.either_half_zero() {
1589 break;
1590 }
1591 }
1592 left_case = left_non_primary.case();
1593 loop {
1594 let ce = right_iter.next().unwrap_or_default();
1595 right_non_primary = ce.non_primary();
1596 if !ce.either_half_zero() {
1597 break;
1598 }
1599 }
1600 right_case = right_non_primary.case();
1601 if left_case != right_case {
1605 if !self.options.upper_first() {
1606 if left_case < right_case {
1607 return Ordering::Less;
1608 }
1609 return Ordering::Greater;
1610 }
1611 if left_case < right_case {
1612 return Ordering::Greater;
1613 }
1614 return Ordering::Less;
1615 }
1616 if left_non_primary.secondary() == NO_CE_SECONDARY {
1617 break;
1618 }
1619 }
1620 } else {
1621 let mut left_non_primary;
1636 let mut right_non_primary;
1637 let mut left_case;
1638 let mut right_case;
1639 let mut left_iter = left_ces.iter();
1640 let mut right_iter = right_ces.iter();
1641 loop {
1642 loop {
1643 left_non_primary = left_iter.next().unwrap_or_default().non_primary();
1644 if left_non_primary.secondary() != 0 {
1645 break;
1646 }
1647 }
1648 left_case = left_non_primary.case();
1649 loop {
1650 right_non_primary = right_iter.next().unwrap_or_default().non_primary();
1651 if right_non_primary.secondary() != 0 {
1652 break;
1653 }
1654 }
1655 right_case = right_non_primary.case();
1656 if left_case != right_case {
1660 if !self.options.upper_first() {
1661 if left_case < right_case {
1662 return Ordering::Less;
1663 }
1664 return Ordering::Greater;
1665 }
1666 if left_case < right_case {
1667 return Ordering::Greater;
1668 }
1669 return Ordering::Less;
1670 }
1671 if left_non_primary.secondary() == NO_CE_SECONDARY {
1672 break;
1673 }
1674 }
1675 }
1676 }
1677
1678 if let Some(tertiary_mask) = self.options.tertiary_mask() {
1679 let mut any_quaternaries = AnyQuaternaryAccumulator::new();
1680 let mut left_iter = left_ces.iter();
1681 let mut right_iter = right_ces.iter();
1682 loop {
1683 let mut left_non_primary;
1684 let mut left_tertiary;
1685 loop {
1686 left_non_primary = left_iter.next().unwrap_or_default().non_primary();
1687 any_quaternaries.accumulate(left_non_primary);
1688 debug_assert!(
1689 left_non_primary.tertiary() != 0 || left_non_primary.case_quaternary() == 0
1690 );
1691 left_tertiary = left_non_primary.tertiary_case_quarternary(tertiary_mask);
1692 if left_tertiary != 0 {
1693 break;
1694 }
1695 }
1696
1697 let mut right_non_primary;
1698 let mut right_tertiary;
1699 loop {
1700 right_non_primary = right_iter.next().unwrap_or_default().non_primary();
1701 any_quaternaries.accumulate(right_non_primary);
1702 debug_assert!(
1703 right_non_primary.tertiary() != 0
1704 || right_non_primary.case_quaternary() == 0
1705 );
1706 right_tertiary = right_non_primary.tertiary_case_quarternary(tertiary_mask);
1707 if right_tertiary != 0 {
1708 break;
1709 }
1710 }
1711
1712 if left_tertiary != right_tertiary {
1713 if self.options.upper_first() {
1714 if left_tertiary > NO_CE_TERTIARY {
1721 if left_non_primary.secondary() != 0 {
1722 left_tertiary ^= 0xC000;
1723 } else {
1724 left_tertiary += 0x4000;
1725 }
1726 }
1727 if right_tertiary > NO_CE_TERTIARY {
1728 if right_non_primary.secondary() != 0 {
1729 right_tertiary ^= 0xC000;
1730 } else {
1731 right_tertiary += 0x4000;
1732 }
1733 }
1734 }
1735 if left_tertiary < right_tertiary {
1736 return Ordering::Less;
1737 }
1738 return Ordering::Greater;
1739 }
1740
1741 if left_tertiary == NO_CE_TERTIARY {
1742 break;
1743 }
1744 }
1745 if !any_variable && !any_quaternaries.has_quaternary() {
1746 return Ordering::Equal;
1747 }
1748 } else {
1749 return Ordering::Equal;
1750 }
1751
1752 if self.options.strength() <= Strength::Tertiary {
1753 return Ordering::Equal;
1754 }
1755
1756 let mut left_iter = left_ces.iter();
1757 let mut right_iter = right_ces.iter();
1758 loop {
1759 let mut left_quaternary;
1760 loop {
1761 let ce = left_iter.next().unwrap_or_default();
1762 if ce.tertiary_ignorable() {
1763 left_quaternary = ce.primary();
1764 } else {
1765 left_quaternary = ce.quaternary();
1766 }
1767 if left_quaternary != 0 {
1768 break;
1769 }
1770 }
1771 let mut right_quaternary;
1772 loop {
1773 let ce = right_iter.next().unwrap_or_default();
1774 if ce.tertiary_ignorable() {
1775 right_quaternary = ce.primary();
1776 } else {
1777 right_quaternary = ce.quaternary();
1778 }
1779 if right_quaternary != 0 {
1780 break;
1781 }
1782 }
1783 if left_quaternary != right_quaternary {
1784 if let Some(reordering) = &self.reordering {
1785 left_quaternary = reordering.reorder(left_quaternary);
1786 right_quaternary = reordering.reorder(right_quaternary);
1787 }
1788 if left_quaternary < right_quaternary {
1789 return Ordering::Less;
1790 }
1791 return Ordering::Greater;
1792 }
1793 if left_quaternary == NO_CE_PRIMARY {
1794 break;
1795 }
1796 }
1797
1798 Ordering::Equal
1799 }
1800
1801 fn sort_key_levels(&self) -> u8 {
1802 #[expect(clippy::indexing_slicing)]
1803 let mut levels = LEVEL_MASKS[self.options.strength() as usize];
1804 if self.options.case_level() {
1805 levels |= CASE_LEVEL_FLAG;
1806 }
1807 levels
1808 }
1809
1810 pub fn write_sort_key_to<S>(&self, s: &str, sink: &mut S) -> Result<S::Output, S::Error>
1859 where
1860 S: CollationKeySink + ?Sized,
1861 S::State: Default,
1862 {
1863 self.write_sort_key_impl(s.chars(), sink)
1864 }
1865
1866 pub fn write_sort_key_utf8_to<S>(&self, s: &[u8], sink: &mut S) -> Result<S::Output, S::Error>
1870 where
1871 S: CollationKeySink + ?Sized,
1872 S::State: Default,
1873 {
1874 self.write_sort_key_impl(s.chars(), sink)
1875 }
1876
1877 pub fn write_sort_key_utf16_to<S>(&self, s: &[u16], sink: &mut S) -> Result<S::Output, S::Error>
1881 where
1882 S: CollationKeySink + ?Sized,
1883 S::State: Default,
1884 {
1885 self.write_sort_key_impl(s.chars(), sink)
1886 }
1887
1888 fn write_sort_key_impl<I, S>(&self, iter: I, sink: &mut S) -> Result<S::Output, S::Error>
1889 where
1890 I: Iterator<Item = char> + Clone,
1891 S: CollationKeySink + ?Sized,
1892 S::State: Default,
1893 {
1894 let identical = if self.options.strength() == Strength::Identical {
1895 Some(iter.clone())
1896 } else {
1897 None
1898 };
1899
1900 let mut state = S::State::default();
1901 self.write_sort_key_up_to_quaternary(iter, sink, &mut state)?;
1902
1903 if let Some(iter) = identical {
1904 let nfd =
1905 DecomposingNormalizerBorrowed::new_with_data(self.decompositions, self.tables);
1906 sink.write_byte(&mut state, LEVEL_SEPARATOR_BYTE)?;
1907
1908 let iter = nfd.normalize_iter(iter);
1909 write_identical_level(iter, sink, &mut state)?;
1910 }
1911
1912 sink.finish(state)
1913 }
1914
1915 fn write_sort_key_up_to_quaternary<I, S>(
1920 &self,
1921 iter: I,
1922 sink: &mut S,
1923 state: &mut S::State,
1924 ) -> Result<(), S::Error>
1925 where
1926 I: Iterator<Item = char>,
1927 S: CollationKeySink + ?Sized,
1928 {
1929 let levels = self.sort_key_levels();
1931
1932 let mut iter =
1933 collation_elements!(self, iter, self.tailoring_or_root(), self.numeric_primary());
1934 iter.init();
1935 let variable_top = self.variable_top();
1936
1937 let tertiary_mask = self.options.tertiary_mask().unwrap_or_default();
1938
1939 let mut cases = SortKeyLevel::default();
1940 let mut secondaries = SortKeyLevel::default();
1941 let mut tertiaries = SortKeyLevel::default();
1942 let mut quaternaries = SortKeyLevel::default();
1943
1944 let mut prev_reordered_primary = 0;
1945 let mut common_cases = 0usize;
1946 let mut common_secondaries = 0usize;
1947 let mut common_tertiaries = 0usize;
1948 let mut common_quaternaries = 0usize;
1949 let mut prev_secondary = 0;
1950 let mut sec_segment_start = 0;
1951
1952 loop {
1953 let mut ce = iter.next();
1954 let mut p = ce.primary();
1955 if p < variable_top && p > MERGE_SEPARATOR_PRIMARY {
1956 if common_quaternaries != 0 {
1959 common_quaternaries -= 1;
1960 while common_quaternaries >= QUAT_COMMON[WEIGHT_MAX_COUNT] as _ {
1961 quaternaries.append_byte(QUAT_COMMON[WEIGHT_MIDDLE]);
1962 common_quaternaries -= QUAT_COMMON[WEIGHT_MAX_COUNT] as usize;
1963 }
1964 quaternaries.append_byte(QUAT_COMMON[WEIGHT_LOW] + common_quaternaries as u8);
1966 common_quaternaries = 0;
1967 }
1968
1969 loop {
1970 if levels & QUATERNARY_LEVEL_FLAG != 0 {
1971 if let Some(reordering) = &self.reordering {
1972 p = reordering.reorder(p);
1973 }
1974 if (p >> 24) as u8 >= QUAT_SHIFTED_LIMIT_BYTE {
1975 quaternaries.append_byte(QUAT_SHIFTED_LIMIT_BYTE);
1978 }
1979 quaternaries.append_weight_32(p);
1980 }
1981 loop {
1982 ce = iter.next();
1983 p = ce.primary();
1984 if p != 0 {
1985 break;
1986 }
1987 }
1988 if !(p < variable_top && p > MERGE_SEPARATOR_PRIMARY) {
1989 break;
1990 }
1991 }
1992 }
1993
1994 if p > NO_CE_PRIMARY && levels & PRIMARY_LEVEL_FLAG != 0 {
1998 let is_compressible = self.special_primaries.is_compressible((p >> 24) as _);
2000 if let Some(reordering) = &self.reordering {
2001 p = reordering.reorder(p);
2002 }
2003 let p1 = (p >> 24) as u8;
2004 if !is_compressible || p1 != (prev_reordered_primary >> 24) as u8 {
2005 if prev_reordered_primary != 0 {
2006 if p < prev_reordered_primary {
2007 if p1 > MERGE_SEPARATOR_BYTE {
2010 sink.write(state, &[PRIMARY_COMPRESSION_LOW_BYTE])?;
2011 }
2012 } else {
2013 sink.write(state, &[PRIMARY_COMPRESSION_HIGH_BYTE])?;
2014 }
2015 }
2016 sink.write_byte(state, p1)?;
2017 prev_reordered_primary = if is_compressible { p } else { 0 };
2018 }
2019
2020 let p2 = (p >> 16) as u8;
2021 if p2 != 0 {
2022 let (b0, b1, b2) = (p2, (p >> 8) as _, p as _);
2023 sink.write_byte(state, b0)?;
2024 if b1 != 0 {
2025 sink.write_byte(state, b1)?;
2026 if b2 != 0 {
2027 sink.write_byte(state, b2)?;
2028 }
2029 }
2030 }
2031 }
2032
2033 let non_primary = ce.non_primary();
2034 if non_primary.ignorable() {
2035 continue; }
2037
2038 macro_rules! handle_common {
2039 ($key:ident, $w:ident, $common:ident, $weights:ident, $lim:expr) => {
2040 if $common != 0 {
2041 $common -= 1;
2042 while $common >= $weights[WEIGHT_MAX_COUNT] as _ {
2043 $key.append_byte($weights[WEIGHT_MIDDLE]);
2044 $common -= $weights[WEIGHT_MAX_COUNT] as usize;
2045 }
2046 let b = if $w < $lim {
2047 $weights[WEIGHT_LOW] + ($common as u8)
2048 } else {
2049 $weights[WEIGHT_HIGH] - ($common as u8)
2050 };
2051 $key.append_byte(b);
2052 $common = 0;
2053 }
2054 };
2055 ($key:ident, $w:ident, $common:ident, $weights:ident) => {
2056 handle_common!($key, $w, $common, $weights, COMMON_WEIGHT16);
2057 };
2058 }
2059
2060 if levels & SECONDARY_LEVEL_FLAG != 0 {
2061 let s = non_primary.secondary();
2062 if s == 0 {
2063 } else if s == COMMON_WEIGHT16
2065 && (!self.options.backward_second_level() || p != MERGE_SEPARATOR_PRIMARY)
2066 {
2067 common_secondaries += 1;
2070 } else if !self.options.backward_second_level() {
2071 handle_common!(secondaries, s, common_secondaries, SEC_COMMON);
2072 secondaries.append_weight_16(s);
2073 } else {
2074 if common_secondaries != 0 {
2075 common_secondaries -= 1;
2076 let remainder = common_secondaries % SEC_COMMON[WEIGHT_MAX_COUNT] as usize;
2078 let b = if prev_secondary < COMMON_WEIGHT16 {
2079 SEC_COMMON[WEIGHT_LOW] + remainder as u8
2080 } else {
2081 SEC_COMMON[WEIGHT_HIGH] - remainder as u8
2082 };
2083 secondaries.append_byte(b);
2084 common_secondaries -= remainder;
2085 while common_secondaries > 0 {
2087 secondaries.append_byte(SEC_COMMON[WEIGHT_MIDDLE]);
2089 common_secondaries -= SEC_COMMON[WEIGHT_MAX_COUNT] as usize;
2090 }
2091 }
2093 if 0 < p && p <= MERGE_SEPARATOR_PRIMARY {
2094 let secs = &mut secondaries.buf;
2097 let last = secs.len() - 1;
2098 if sec_segment_start < last {
2099 let mut q = sec_segment_start;
2100 let mut r = last;
2101
2102 #[expect(clippy::indexing_slicing)]
2104 while q < r {
2105 let b = secs[q];
2106 secs[q] = secs[r];
2107 q += 1;
2108 secs[r] = b;
2109 r -= 1;
2110 }
2111 }
2112 let b = if p == NO_CE_PRIMARY {
2113 LEVEL_SEPARATOR_BYTE
2114 } else {
2115 MERGE_SEPARATOR_BYTE
2116 };
2117 secondaries.append_byte(b);
2118 prev_secondary = 0;
2119 sec_segment_start = secondaries.len();
2120 } else {
2121 secondaries.append_reverse_weight_16(s);
2122 prev_secondary = s;
2123 }
2124 }
2125 }
2126
2127 if levels & CASE_LEVEL_FLAG != 0 {
2128 if self.options.strength() == Strength::Primary && p == 0
2129 || non_primary.bits() <= 0xffff
2130 {
2131 } else {
2135 let mut c = ((non_primary.bits() >> 8) & 0xff) as u8;
2137 debug_assert_ne!(c & 0xc0, 0xc0);
2138 if c & 0xc0 == 0 && c > LEVEL_SEPARATOR_BYTE {
2139 common_cases += 1;
2140 } else {
2141 if !self.options.upper_first() {
2142 if common_cases != 0 && (c > LEVEL_SEPARATOR_BYTE || !cases.is_empty())
2147 {
2148 common_cases -= 1;
2149 while common_cases >= CASE_LOWER_FIRST_COMMON[WEIGHT_MAX_COUNT] as _
2150 {
2151 cases.append_byte(CASE_LOWER_FIRST_COMMON[WEIGHT_MIDDLE] << 4);
2152 common_cases -=
2153 CASE_LOWER_FIRST_COMMON[WEIGHT_MAX_COUNT] as usize;
2154 }
2155 let b = if c <= LEVEL_SEPARATOR_BYTE {
2156 CASE_LOWER_FIRST_COMMON[WEIGHT_LOW] + common_cases as u8
2157 } else {
2158 CASE_LOWER_FIRST_COMMON[WEIGHT_HIGH] - common_cases as u8
2159 };
2160 cases.append_byte(b << 4);
2161 common_cases = 0;
2162 }
2163 if c > LEVEL_SEPARATOR_BYTE {
2164 c = (CASE_LOWER_FIRST_COMMON[WEIGHT_HIGH] + (c >> 6)) << 4;
2166 }
2167 } else {
2168 if common_cases != 0 {
2173 common_cases -= 1;
2174 while common_cases >= CASE_UPPER_FIRST_COMMON[WEIGHT_MAX_COUNT] as _
2175 {
2176 cases.append_byte(CASE_UPPER_FIRST_COMMON[WEIGHT_LOW] << 4);
2177 common_cases -=
2178 CASE_UPPER_FIRST_COMMON[WEIGHT_MAX_COUNT] as usize;
2179 }
2180 cases.append_byte(
2181 (CASE_UPPER_FIRST_COMMON[WEIGHT_LOW] + common_cases as u8) << 4,
2182 );
2183 common_cases = 0;
2184 }
2185 if c > LEVEL_SEPARATOR_BYTE {
2186 c = (CASE_UPPER_FIRST_COMMON[WEIGHT_LOW] - (c >> 6)) << 4;
2188 }
2189 }
2190 cases.append_byte(c);
2193 }
2194 }
2195 }
2196
2197 if levels & TERTIARY_LEVEL_FLAG != 0 {
2198 let mut t = non_primary.tertiary_case_quarternary(tertiary_mask);
2199 debug_assert_ne!(non_primary.bits() & 0xc000, 0xc000);
2200 if t == COMMON_WEIGHT16 {
2201 common_tertiaries += 1;
2202 } else if tertiary_mask & 0x8000 == 0 {
2203 handle_common!(tertiaries, t, common_tertiaries, TER_ONLY_COMMON);
2206 if t > COMMON_WEIGHT16 {
2207 t += 0xc000;
2208 }
2209 tertiaries.append_weight_16(t);
2210 } else if !self.options.upper_first() {
2211 handle_common!(tertiaries, t, common_tertiaries, TER_LOWER_FIRST_COMMON);
2214 if t > COMMON_WEIGHT16 {
2215 t += 0x4000;
2216 }
2217 tertiaries.append_weight_16(t);
2218 } else {
2219 if t <= NO_CE_TERTIARY {
2232 } else if non_primary.bits() > 0xffff {
2234 t ^= 0xc000;
2236 if t < (TER_UPPER_FIRST_COMMON[WEIGHT_HIGH] as u16) << 8 {
2237 t -= 0x4000;
2238 }
2239 } else {
2240 debug_assert!((0x8600..=0xbfff).contains(&t));
2242 t += 0x4000;
2243 }
2244 handle_common!(
2245 tertiaries,
2246 t,
2247 common_tertiaries,
2248 TER_UPPER_FIRST_COMMON,
2249 (TER_UPPER_FIRST_COMMON[WEIGHT_LOW] as u16) << 8
2250 );
2251 tertiaries.append_weight_16(t);
2252 }
2253 }
2254
2255 if levels & QUATERNARY_LEVEL_FLAG != 0 {
2256 let q = (non_primary.bits() & 0xffff) as u16;
2257 if q & 0xc0 == 0 && q > NO_CE_QUATERNARY {
2258 common_quaternaries += 1;
2259 } else if q == NO_CE_QUATERNARY
2260 && self.options.alternate_handling() == AlternateHandling::NonIgnorable
2261 && quaternaries.is_empty()
2262 {
2263 quaternaries.append_byte(LEVEL_SEPARATOR_BYTE);
2271 } else {
2272 let q = if q == NO_CE_QUATERNARY {
2273 LEVEL_SEPARATOR_BYTE
2274 } else {
2275 (0xfc + ((q >> 6) & 3)) as u8
2276 };
2277 handle_common!(
2278 quaternaries,
2279 q,
2280 common_quaternaries,
2281 QUAT_COMMON,
2282 QUAT_COMMON[WEIGHT_LOW]
2283 );
2284 quaternaries.append_byte(q);
2285 }
2286 }
2287
2288 if (non_primary.bits() >> 24) as u8 == LEVEL_SEPARATOR_BYTE {
2289 break; }
2291 }
2292
2293 macro_rules! write_level {
2294 ($key:ident, $flag:ident) => {
2295 if levels & $flag != 0 {
2296 sink.write(state, &[LEVEL_SEPARATOR_BYTE])?;
2297 sink.write(state, &$key.buf)?;
2298 }
2299 };
2300 }
2301
2302 write_level!(secondaries, SECONDARY_LEVEL_FLAG);
2303
2304 if levels & CASE_LEVEL_FLAG != 0 {
2305 sink.write(state, &[LEVEL_SEPARATOR_BYTE])?;
2306
2307 let mut b = 0;
2309 if let Some((last, head)) = cases.buf.split_last() {
2310 debug_assert_eq!(*last, 1); for c in head {
2312 debug_assert_eq!(*c & 0xf, 0);
2313 debug_assert_ne!(*c, 0);
2314 if b == 0 {
2315 b = *c;
2316 } else {
2317 sink.write_byte(state, b | (*c >> 4))?;
2318 b = 0;
2319 }
2320 }
2321 } else {
2322 debug_assert!(false);
2323 }
2324 if b != 0 {
2325 sink.write_byte(state, b)?;
2326 }
2327 }
2328
2329 write_level!(tertiaries, TERTIARY_LEVEL_FLAG);
2330 write_level!(quaternaries, QUATERNARY_LEVEL_FLAG);
2331
2332 Ok(())
2333 }
2334}
2335
2336#[derive(Debug, PartialEq, Eq)]
2338pub struct TooSmall {
2339 pub length: usize,
2341}
2342
2343impl TooSmall {
2344 pub fn new(length: usize) -> Self {
2345 Self { length }
2346 }
2347}
2348
2349pub trait CollationKeySink {
2363 type Error;
2365
2366 type State;
2368
2369 type Output;
2371
2372 fn write(&mut self, state: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error>;
2374
2375 fn write_byte(&mut self, state: &mut Self::State, b: u8) -> Result<(), Self::Error> {
2377 self.write(state, &[b])
2378 }
2379
2380 fn finish(&mut self, state: Self::State) -> Result<Self::Output, Self::Error>;
2383}
2384
2385impl CollationKeySink for Vec<u8> {
2386 type Error = Infallible;
2387 type State = ();
2388 type Output = ();
2389
2390 fn write(&mut self, _: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error> {
2391 self.extend_from_slice(buf);
2392 Ok(())
2393 }
2394
2395 fn finish(&mut self, _: Self::State) -> Result<Self::Output, Self::Error> {
2396 Ok(())
2397 }
2398}
2399
2400impl CollationKeySink for VecDeque<u8> {
2401 type Error = Infallible;
2402 type State = ();
2403 type Output = ();
2404
2405 fn write(&mut self, _: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error> {
2406 self.extend(buf.iter());
2407 Ok(())
2408 }
2409
2410 fn finish(&mut self, _: Self::State) -> Result<Self::Output, Self::Error> {
2411 Ok(())
2412 }
2413}
2414
2415impl<const N: usize> CollationKeySink for SmallVec<[u8; N]> {
2416 type Error = Infallible;
2417 type State = ();
2418 type Output = ();
2419
2420 fn write(&mut self, _: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error> {
2421 self.extend_from_slice(buf);
2422 Ok(())
2423 }
2424
2425 fn finish(&mut self, _: Self::State) -> Result<Self::Output, Self::Error> {
2426 Ok(())
2427 }
2428}
2429
2430impl CollationKeySink for [u8] {
2431 type Error = TooSmall;
2432 type State = usize;
2433 type Output = usize;
2434
2435 fn write(&mut self, offset: &mut Self::State, buf: &[u8]) -> Result<(), Self::Error> {
2436 if *offset + buf.len() <= self.len() {
2437 #[expect(clippy::indexing_slicing)]
2439 self[*offset..*offset + buf.len()].copy_from_slice(buf);
2440 }
2441 *offset += buf.len();
2442 Ok(())
2443 }
2444
2445 fn finish(&mut self, offset: Self::State) -> Result<Self::Output, Self::Error> {
2446 if offset <= self.len() {
2447 Ok(offset)
2448 } else {
2449 Err(TooSmall::new(offset))
2450 }
2451 }
2452}
2453
2454#[derive(Default)]
2455struct SortKeyLevel {
2456 buf: SmallVec<[u8; 40]>,
2457}
2458
2459impl SortKeyLevel {
2460 fn len(&self) -> usize {
2461 self.buf.len()
2462 }
2463
2464 fn is_empty(&self) -> bool {
2465 self.buf.is_empty()
2466 }
2467
2468 fn append_byte(&mut self, x: u8) {
2469 self.buf.push(x);
2470 }
2471
2472 fn append_weight_16(&mut self, w: u16) {
2473 debug_assert_ne!(w, 0);
2474 let b0 = (w >> 8) as u8;
2475 let b1 = w as u8;
2476 self.append_byte(b0);
2477 if b1 != 0 {
2478 self.append_byte(b1);
2479 }
2480 }
2481
2482 fn append_reverse_weight_16(&mut self, w: u16) {
2483 debug_assert_ne!(w, 0);
2484 let b0 = (w >> 8) as u8;
2485 let b1 = w as u8;
2486 if b1 != 0 {
2487 self.append_byte(b1);
2488 }
2489 self.append_byte(b0);
2490 }
2491
2492 fn append_weight_32(&mut self, w: u32) {
2493 debug_assert_ne!(w, 0);
2494 let b0 = (w >> 24) as u8;
2495 let b1 = (w >> 16) as u8;
2496 let b2 = (w >> 8) as u8;
2497 let b3 = w as u8;
2498 self.append_byte(b0);
2499 if b1 != 0 {
2500 self.append_byte(b1);
2501 if b2 != 0 {
2502 self.append_byte(b2);
2503 if b3 != 0 {
2504 self.append_byte(b3);
2505 }
2506 }
2507 }
2508 }
2509}
2510
2511macro_rules! negdivmod {
2517 ($n:ident, $d:ident, $m:ident) => {
2518 $m = $n % $d;
2519 $n /= $d;
2520 if $m < 0 {
2521 $n -= 1;
2522 $m += $d;
2523 }
2524 };
2525}
2526
2527fn write_diff<S>(mut diff: i32, sink: &mut S, state: &mut S::State) -> Result<(), S::Error>
2528where
2529 S: CollationKeySink + ?Sized,
2530{
2531 let mut out = |b| sink.write_byte(state, b);
2532
2533 if diff >= SLOPE_REACH_NEG_1 {
2534 if diff <= SLOPE_REACH_POS_1 {
2535 out((SLOPE_MIDDLE + diff) as _)?;
2536 } else if diff <= SLOPE_REACH_POS_2 {
2537 out((SLOPE_START_POS_2 + (diff / SLOPE_TAIL_COUNT)) as _)?;
2538 out((SLOPE_MIN + diff % SLOPE_TAIL_COUNT) as _)?;
2539 } else if diff <= SLOPE_REACH_POS_3 {
2540 let p2 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
2541 diff /= SLOPE_TAIL_COUNT;
2542 let p1 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
2543 let p0 = SLOPE_START_POS_3 + (diff / SLOPE_TAIL_COUNT);
2544 out(p0 as _)?;
2545 out(p1 as _)?;
2546 out(p2 as _)?;
2547 } else {
2548 let p3 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
2549 diff /= SLOPE_TAIL_COUNT;
2550 let p2 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
2551 diff /= SLOPE_TAIL_COUNT;
2552 let p1 = SLOPE_MIN + diff % SLOPE_TAIL_COUNT;
2553 out(SLOPE_MAX as _)?;
2554 out(p1 as _)?;
2555 out(p2 as _)?;
2556 out(p3 as _)?;
2557 }
2558 } else {
2559 let mut m;
2560
2561 if diff >= SLOPE_REACH_NEG_2 {
2562 negdivmod!(diff, SLOPE_TAIL_COUNT, m);
2563 out((SLOPE_START_NEG_2 + diff) as _)?;
2564 out((SLOPE_MIN + m) as _)?;
2565 } else if diff >= SLOPE_REACH_NEG_3 {
2566 negdivmod!(diff, SLOPE_TAIL_COUNT, m);
2567 let p2 = SLOPE_MIN + m;
2568 negdivmod!(diff, SLOPE_TAIL_COUNT, m);
2569 let p1 = SLOPE_MIN + m;
2570 let p0 = SLOPE_START_NEG_3 + diff;
2571 out(p0 as _)?;
2572 out(p1 as _)?;
2573 out(p2 as _)?;
2574 } else {
2575 negdivmod!(diff, SLOPE_TAIL_COUNT, m);
2576 let p3 = SLOPE_MIN + m;
2577 negdivmod!(diff, SLOPE_TAIL_COUNT, m);
2578 let p2 = SLOPE_MIN + m;
2579 negdivmod!(diff, SLOPE_TAIL_COUNT, m);
2580 let p1 = SLOPE_MIN + m;
2581 let _ = diff;
2582 out(SLOPE_MIN as _)?;
2583 out(p1 as _)?;
2584 out(p2 as _)?;
2585 out(p3 as _)?;
2586 }
2587 }
2588
2589 Ok(())
2590}
2591
2592fn write_identical_level<I, S>(iter: I, sink: &mut S, state: &mut S::State) -> Result<(), S::Error>
2593where
2594 I: Iterator<Item = char>,
2595 S: CollationKeySink + ?Sized,
2596{
2597 let mut prev = 0i32;
2598
2599 for c in iter {
2600 if !(0x4e00..=0xa000).contains(&prev) {
2601 prev = (prev & !0x7f) - SLOPE_REACH_NEG_1;
2602 } else {
2603 prev = 0x9fff - SLOPE_REACH_POS_2;
2605 }
2606
2607 if c == MERGE_SEPARATOR {
2608 sink.write_byte(state, MERGE_SEPARATOR_BYTE)?;
2609 prev = 0;
2610 } else {
2611 let c = c as i32;
2612 write_diff(c - prev, sink, state)?;
2613 prev = c;
2614 }
2615 }
2616 Ok(())
2617}
2618
2619#[cfg(test)]
2620mod test {
2621 use super::*;
2622 use icu_locale::locale;
2623
2624 type Key = Vec<u8>;
2625
2626 fn collator_en(strength: Strength) -> CollatorBorrowed<'static> {
2627 let locale = locale!("en").into();
2628 let mut options = CollatorOptions::default();
2629 options.strength = Some(strength);
2630 Collator::try_new(locale, options).unwrap()
2631 }
2632
2633 fn collator_en_case_level(strength: Strength) -> CollatorBorrowed<'static> {
2634 let locale = locale!("en").into();
2635 let mut options = CollatorOptions::default();
2636 options.strength = Some(strength);
2637 options.case_level = Some(crate::options::CaseLevel::On);
2638 Collator::try_new(locale, options).unwrap()
2639 }
2640
2641 fn keys(strength: Strength) -> (Key, Key, Key) {
2642 let collator = collator_en(strength);
2643
2644 let mut k0 = Vec::new();
2645 let Ok(()) = collator.write_sort_key_to("aabc", &mut k0);
2646 let mut k1 = Vec::new();
2647 let Ok(()) = collator.write_sort_key_to("aAbc", &mut k1);
2648 let mut k2 = Vec::new();
2649 let Ok(()) = collator.write_sort_key_to("áAbc", &mut k2);
2650
2651 (k0, k1, k2)
2652 }
2653
2654 #[test]
2655 fn sort_key_primary() {
2656 let (k0, k1, k2) = keys(Strength::Primary);
2657 assert_eq!(k0, k1);
2658 assert_eq!(k1, k2);
2659 }
2660
2661 #[test]
2662 fn sort_key_secondary() {
2663 let (k0, k1, k2) = keys(Strength::Secondary);
2664 assert_eq!(k0, k1);
2665 assert!(k1 < k2);
2666 }
2667
2668 #[test]
2669 fn sort_key_tertiary() {
2670 let (k0, k1, k2) = keys(Strength::Tertiary);
2671 assert!(k0 < k1);
2672 assert!(k1 < k2);
2673 }
2674
2675 fn collator_ja(strength: Strength) -> CollatorBorrowed<'static> {
2676 let locale = locale!("ja").into();
2677 let mut options = CollatorOptions::default();
2678 options.strength = Some(strength);
2679 Collator::try_new(locale, options).unwrap()
2680 }
2681
2682 fn keys_ja_strs(strength: Strength, s0: &str, s1: &str) -> (Key, Key) {
2683 let collator = collator_ja(strength);
2684
2685 let mut k0 = Vec::new();
2686 let Ok(()) = collator.write_sort_key_to(s0, &mut k0);
2687 let mut k1 = Vec::new();
2688 let Ok(()) = collator.write_sort_key_to(s1, &mut k1);
2689
2690 (k0, k1)
2691 }
2692
2693 fn keys_ja(strength: Strength) -> (Key, Key) {
2694 keys_ja_strs(strength, "あ", "ア")
2695 }
2696
2697 #[test]
2698 fn sort_keys_ja_to_quaternary() {
2699 let (k0, k1) = keys_ja(Strength::Primary);
2700 assert_eq!(k0, k1);
2701 let (k0, k1) = keys_ja(Strength::Secondary);
2702 assert_eq!(k0, k1);
2703 let (k0, k1) = keys_ja(Strength::Tertiary);
2704 assert_eq!(k0, k1);
2705 let (k0, k1) = keys_ja(Strength::Quaternary);
2706 assert!(k0 < k1);
2707 }
2708
2709 #[test]
2710 fn sort_keys_ja_identical() {
2711 let (k0, k1) = keys_ja_strs(Strength::Quaternary, "ア", "ア");
2712 assert_eq!(k0, k1);
2713 let (k0, k1) = keys_ja_strs(Strength::Identical, "ア", "ア");
2714 assert!(k0 < k1);
2715 }
2716
2717 #[test]
2718 fn sort_keys_utf16() {
2719 let collator = collator_en(Strength::Identical);
2720
2721 const STR8: &[u8] = b"hello world!";
2722 let mut k8 = Vec::new();
2723 let Ok(()) = collator.write_sort_key_utf8_to(STR8, &mut k8);
2724
2725 const STR16: &[u16] = &[
2726 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
2727 ];
2728 let mut k16 = Vec::new();
2729 let Ok(()) = collator.write_sort_key_utf16_to(STR16, &mut k16);
2730 assert_eq!(k8, k16);
2731 }
2732
2733 #[test]
2734 fn sort_keys_invalid() {
2735 let collator = collator_en(Strength::Identical);
2736
2737 let mut k = Vec::new();
2739 let Ok(()) = collator.write_sort_key_utf8_to(b"\xf0\x90", &mut k);
2740 let mut k = Vec::new();
2741 let Ok(()) = collator.write_sort_key_utf16_to(&[0xdd1e], &mut k);
2742 }
2743
2744 #[test]
2745 fn sort_key_to_vecdeque() {
2746 let collator = collator_en(Strength::Identical);
2747
2748 let mut k0 = Vec::new();
2749 let Ok(()) = collator.write_sort_key_to("áAbc", &mut k0);
2750 let mut k1 = VecDeque::new();
2751 let Ok(()) = collator.write_sort_key_to("áAbc", &mut k1);
2752 assert!(k0.iter().eq(k1.iter()));
2753 }
2754
2755 #[test]
2756 fn sort_key_to_slice() {
2757 let collator = collator_en(Strength::Identical);
2758
2759 let mut k0 = Vec::new();
2760 let Ok(()) = collator.write_sort_key_to("áAbc", &mut k0);
2761 let mut k1 = [0u8; 100];
2762 let len = collator.write_sort_key_to("áAbc", &mut k1[..]).unwrap();
2763 assert_eq!(len, k0.len());
2764 assert!(k0.iter().eq(k1[..len].iter()));
2765 }
2766
2767 #[test]
2768 fn sort_key_to_slice_no_space() {
2769 let collator = collator_en(Strength::Identical);
2770 let mut k = [0u8; 0];
2771 let res = collator.write_sort_key_to("áAbc", &mut k[..]);
2772 assert!(matches!(res, Err(TooSmall { .. })));
2773 }
2774
2775 #[test]
2776 fn sort_key_to_slice_too_long() {
2777 let collator = collator_en(Strength::Identical);
2779 let mut k = [0u8; 5];
2780 let res = collator.write_sort_key_to("áAbc", &mut k[..]);
2781 assert!(matches!(res, Err(TooSmall { .. })));
2782 }
2783
2784 #[test]
2785 fn sort_key_to_slice_identical_too_long() {
2786 let collator = collator_en(Strength::Identical);
2788 let mut k = [0u8; 22];
2789 let res = collator.write_sort_key_to("áAbc", &mut k[..]);
2790 assert!(matches!(res, Err(TooSmall { .. })));
2791 }
2792
2793 #[test]
2794 fn sort_key_just_right() {
2795 let collator = collator_en(Strength::Identical);
2797 let mut k = [0u8; 0];
2798 let res = collator.write_sort_key_to("áAbc", &mut k[..]);
2799 let len = res.unwrap_err().length;
2800
2801 let mut k = vec![0u8; len - 1];
2803 let res = collator.write_sort_key_to("áAbc", &mut k[..]);
2804 let len = res.unwrap_err().length;
2805
2806 let mut k = vec![0u8; len];
2808 let res = collator.write_sort_key_to("áAbc", &mut k[..]);
2809 assert_eq!(res, Ok(len));
2810 }
2811
2812 #[test]
2813 fn sort_key_utf16_slice_too_small() {
2814 let collator = collator_en(Strength::Identical);
2815 const STR16: &[u16] = &[0x68, 0x65, 0x6c, 0x6c, 0x6f];
2816 let mut k = [0u8; 4];
2817 let res = collator.write_sort_key_utf16_to(STR16, &mut k[..]);
2818 assert!(matches!(res, Err(TooSmall { .. })));
2819 }
2820
2821 #[test]
2822 fn sort_key_very_long() {
2823 let collator = collator_en(Strength::Secondary);
2824 let mut k = Vec::new();
2825 let Ok(()) = collator.write_sort_key_to(&"a".repeat(300), &mut k);
2826 }
2827
2828 #[test]
2829 fn sort_key_case_level() {
2830 let collator = collator_en_case_level(Strength::Tertiary);
2831 let mut k = Vec::new();
2832 let Ok(()) = collator.write_sort_key_to("aBc", &mut k);
2833 }
2834
2835 #[test]
2836 fn sort_key_case_level_empty() {
2837 let collator = collator_en_case_level(Strength::Tertiary);
2838 let mut k = Vec::new();
2839 let Ok(()) = collator.write_sort_key_to("", &mut k);
2840 }
2841
2842 fn check_sort_key_less(a: &[u16], b: &[u16]) {
2843 let collator = collator_en(Strength::Identical);
2844 let mut ak = Vec::new();
2845 let Ok(()) = collator.write_sort_key_utf16_to(a, &mut ak);
2846 let mut bk = Vec::new();
2847 let Ok(()) = collator.write_sort_key_utf16_to(b, &mut bk);
2848 assert!(ak < bk, "failed: {a:04x?} - {b:04x?}");
2849 }
2850
2851 #[test]
2852 fn sort_key_fffe_bug_6811() {
2853 check_sort_key_less(
2854 &[0xfffe, 0x0001, 0x0002, 0x0003],
2855 &[0x0001, 0xfffe, 0x0002, 0x0003],
2856 );
2857 check_sort_key_less(
2858 &[0x0001, 0xfffe, 0x0002, 0x0003],
2859 &[0x0001, 0x0002, 0xfffe, 0x0003],
2860 );
2861 check_sort_key_less(
2862 &[0x0001, 0x0002, 0xfffe, 0x0003],
2863 &[0x0001, 0x0002, 0x0003, 0xfffe],
2864 );
2865 check_sort_key_less(&[0xfffe, 0x0000, 0x0000], &[0x0000, 0xfffe, 0x0000]);
2866 check_sort_key_less(&[0x0000, 0xfffe, 0x0000], &[0x0000, 0x0000, 0xfffe]);
2867 }
2868}