use super::generated::collation as gen;
use super::normalize::{canonical_combining_class as ccc, nfd};
use alloc::vec::Vec;
use core::cmp::Ordering;
#[inline]
fn primary(ce: u64) -> u16 {
((ce >> 32) & 0xFFFF) as u16
}
#[inline]
fn secondary(ce: u64) -> u16 {
((ce >> 16) & 0xFFFF) as u16
}
#[inline]
fn tertiary(ce: u64) -> u16 {
(ce & 0xFFFF) as u16
}
#[inline]
fn is_variable(ce: u64) -> bool {
(ce >> 48) & 1 != 0
}
#[inline]
fn pack(p: u32, s: u32, t: u32) -> u64 {
(p as u64) << 32 | (s as u64) << 16 | t as u64
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AlternateHandling {
NonIgnorable,
Shifted,
}
const IMPLICIT_RANGES: &[(u32, u32, u32, u32)] = &[
(0x17000, 0x187FF, 0xFB00, 0x17000),
(0x18800, 0x18AFF, 0xFB01, 0x18800),
(0x18D00, 0x18D7F, 0xFB00, 0x17000),
(0x18D80, 0x18DFF, 0xFB01, 0x18800),
(0x1B170, 0x1B2FF, 0xFB02, 0x1B170),
(0x18B00, 0x18CFF, 0xFB03, 0x18B00),
];
fn push_implicit(cp: u32, out: &mut Vec<u64>) {
let (aaaa, bbbb) = implicit_primaries(cp);
out.push(pack(aaaa, 0x0020, 0x0002));
out.push(pack(bbbb, 0x0000, 0x0000));
}
fn implicit_primaries(cp: u32) -> (u32, u32) {
for &(first, last, base, origin) in IMPLICIT_RANGES {
if cp >= first && cp <= last {
return (base, (cp - origin) | 0x8000);
}
}
let base = if gen::unified_ideograph(cp) {
if (0x4E00..=0x9FFF).contains(&cp) || (0xF900..=0xFAFF).contains(&cp) {
0xFB40
} else {
0xFB80
}
} else {
0xFBC0
};
(base + (cp >> 15), (cp & 0x7FFF) | 0x8000)
}
fn lookup_contraction(first: u32, suffix: &[char]) -> Option<&'static [u64]> {
for (suf, ces) in gen::contractions(first)? {
if *suf == suffix {
return Some(ces);
}
}
None
}
fn collation_elements(mut cv: Vec<char>) -> Vec<u64> {
let mut cea = Vec::new();
let mut i = 0;
while i < cv.len() {
let s0 = cv[i] as u32;
let mut end = i + 1;
let mut matched: Option<&'static [u64]> = gen::ce_singles(s0);
let mut suffix: Vec<char> = Vec::new();
if let Some(entries) = gen::contractions(s0) {
for (suf, ces) in entries {
let stop = i + 1 + suf.len();
if stop <= cv.len() && cv[i + 1..stop] == **suf {
matched = Some(ces);
suffix = suf.to_vec();
end = stop;
break;
}
}
}
loop {
let mut last_ccc = 0u8;
let mut j = end;
let mut hit = None;
while j < cv.len() {
let cc = ccc(cv[j]);
if cc == 0 {
break; }
if last_ccc < cc {
let mut trial = suffix.clone();
trial.push(cv[j]);
if let Some(ces) = lookup_contraction(s0, &trial) {
hit = Some((j, ces, trial));
break;
}
last_ccc = cc;
} else {
break; }
j += 1;
}
match hit {
Some((j, ces, trial)) => {
matched = Some(ces);
suffix = trial;
cv.remove(j);
}
None => break,
}
}
match matched {
Some(ces) => cea.extend_from_slice(ces),
None => push_implicit(s0, &mut cea),
}
i = end;
}
cea
}
fn emit_number(digits: &[char], cea: &mut Vec<u64>) {
let marker = primary(collation_elements(alloc::vec!['0'])[0]) as u32;
let first_sig = digits
.iter()
.position(|&c| c != '0')
.unwrap_or(digits.len() - 1);
let sig = &digits[first_sig..];
cea.push(pack(marker, 0, 0));
cea.push(pack(sig.len() as u32 + 1, 0, 0));
for &d in sig {
cea.push(pack((d as u32 - '0' as u32) + 1, 0, 0));
}
}
fn collation_elements_numeric(cv: Vec<char>) -> Vec<u64> {
let mut cea = Vec::new();
let mut i = 0;
while i < cv.len() {
if cv[i].is_ascii_digit() {
let start = i;
while i < cv.len() && cv[i].is_ascii_digit() {
i += 1;
}
emit_number(&cv[start..i], &mut cea);
} else {
let start = i;
while i < cv.len() && !cv[i].is_ascii_digit() {
i += 1;
}
cea.extend(collation_elements(cv[start..i].to_vec()));
}
}
cea
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Strength {
Primary,
Secondary,
Tertiary,
Quaternary,
}
fn build_sort_key(cea: &[u64], alternate: AlternateHandling, strength: Strength) -> Vec<u16> {
let mut key = Vec::new();
match alternate {
AlternateHandling::NonIgnorable => {
for &ce in cea {
let p = primary(ce);
if p != 0 {
key.push(p);
}
}
if strength == Strength::Primary {
return key;
}
key.push(0);
for &ce in cea {
let s = secondary(ce);
if s != 0 {
key.push(s);
}
}
if strength == Strength::Secondary {
return key;
}
key.push(0);
for &ce in cea {
let t = tertiary(ce);
if t != 0 {
key.push(t);
}
}
}
AlternateHandling::Shifted => {
let mut rows: Vec<(u16, u16, u16, u16)> = Vec::with_capacity(cea.len());
let mut after_variable = false;
for &ce in cea {
let (p, s, t) = (primary(ce), secondary(ce), tertiary(ce));
if is_variable(ce) && p != 0 {
rows.push((0, 0, 0, p)); after_variable = true;
} else if p == 0 && s == 0 && t == 0 {
rows.push((0, 0, 0, 0)); } else if p == 0 {
if after_variable {
rows.push((0, 0, 0, 0));
} else {
rows.push((0, s, t, 0xFFFF));
}
} else {
rows.push((p, s, t, 0xFFFF));
after_variable = false;
}
}
for &(p, ..) in &rows {
if p != 0 {
key.push(p);
}
}
if strength == Strength::Primary {
return key;
}
key.push(0);
for &(_, s, ..) in &rows {
if s != 0 {
key.push(s);
}
}
if strength == Strength::Secondary {
return key;
}
key.push(0);
for &(_, _, t, _) in &rows {
if t != 0 {
key.push(t);
}
}
if strength == Strength::Tertiary {
return key;
}
key.push(0);
for &(.., q) in &rows {
if q != 0 {
key.push(q);
}
}
}
}
key
}
#[derive(Debug, Clone, Copy)]
pub struct Collator {
alternate: AlternateHandling,
strength: Strength,
numeric: bool,
}
impl Default for Collator {
fn default() -> Self {
Collator {
alternate: AlternateHandling::Shifted,
strength: Strength::Tertiary,
numeric: false,
}
}
}
impl Collator {
#[must_use]
pub fn new(alternate: AlternateHandling) -> Self {
Collator {
alternate,
strength: Strength::Tertiary,
numeric: false,
}
}
#[must_use]
pub fn with_strength(mut self, strength: Strength) -> Self {
self.strength = strength;
self
}
#[must_use]
pub fn with_numeric(mut self, numeric: bool) -> Self {
self.numeric = numeric;
self
}
#[must_use]
pub fn sort_key(&self, s: &str) -> Vec<u16> {
let cv: Vec<char> = nfd(s.chars()).collect();
let cea = if self.numeric {
collation_elements_numeric(cv)
} else {
collation_elements(cv)
};
build_sort_key(&cea, self.alternate, self.strength)
}
#[must_use]
pub fn compare(&self, a: &str, b: &str) -> Ordering {
self.sort_key(a).cmp(&self.sort_key(b))
}
}
#[must_use]
pub fn compare(a: &str, b: &str) -> Ordering {
Collator::default().compare(a, b)
}
#[must_use]
pub fn sort_key(s: &str) -> Vec<u16> {
Collator::default().sort_key(s)
}
pub struct Tailoring {
entries: Vec<(Vec<char>, Vec<u64>)>,
}
impl Tailoring {
#[must_use]
pub fn for_locale(lang: &str) -> Option<Tailoring> {
let lc = lang.get(..2).unwrap_or(lang);
let rules = match lc {
"sv" | "fi" => "&z < å < ä < ö", "da" | "nb" | "nn" | "no" => "&z < æ < ø < å", "is" => "&y < ð < þ < æ < ö", "et" => "&s < š < z < ž < õ < ä < ö < ü", "de" => "&ae = ä &oe = ö &ue = ü &ss = ß", _ => return None,
};
Tailoring::parse(rules)
}
#[must_use]
pub fn parse(rules: &str) -> Option<Tailoring> {
let chars: Vec<char> = rules.chars().filter(|c| !c.is_whitespace()).collect();
let mut entries: Vec<(Vec<char>, Vec<u64>)> = Vec::new();
let mut anchor: Vec<char> = Vec::new();
let mut anchor_primary = 0u32;
let (mut p_off, mut s_off, mut t_off) = (0u32, 0u32, 0u32);
let mut i = 0;
while i < chars.len() {
match chars[i] {
'&' => {
i += 1;
let start = i;
while i < chars.len() && !matches!(chars[i], '<' | '=' | '&') {
i += 1;
}
anchor = chars[start..i].to_vec();
anchor_primary = primary(*collation_elements(anchor.clone()).first()?) as u32;
(p_off, s_off, t_off) = (0, 0, 0);
}
'<' | '=' => {
let mut level = 0u32;
while i < chars.len() && (chars[i] == '<' || chars[i] == '=') {
if chars[i] == '<' {
level += 1;
}
i += 1;
}
let target = *chars.get(i)?;
i += 1;
if anchor_primary == 0 {
return None; }
if level == 0 {
Self::push_expansion(&mut entries, target, &anchor);
} else {
match level {
1 => (p_off, s_off, t_off) = (p_off + 1, 0, 0),
2 => (s_off, t_off) = (s_off + 1, 0),
_ => t_off += 1,
}
Self::push_letter(
&mut entries,
target,
anchor_primary + p_off,
s_off,
t_off,
);
}
}
_ => i += 1, }
}
if entries.is_empty() {
return None;
}
entries.sort_by_key(|e| core::cmp::Reverse(e.0.len()));
Some(Tailoring { entries })
}
fn push_letter(
entries: &mut Vec<(Vec<char>, Vec<u64>)>,
target: char,
p: u32,
s_off: u32,
t_off: u32,
) {
for (ch, case_t) in [(target, 0x0002u32), (upper(target), 0x0008)] {
let seq: Vec<char> = nfd(core::iter::once(ch)).collect();
if !seq.is_empty() {
entries.push((seq, alloc::vec![pack(p, 0x0020 + s_off, case_t + t_off)]));
}
}
}
fn push_expansion(entries: &mut Vec<(Vec<char>, Vec<u64>)>, target: char, anchor: &[char]) {
let upper_anchor: Vec<char> = anchor.iter().map(|&c| upper(c)).collect();
for (ch, anchor_form) in [(target, anchor.to_vec()), (upper(target), upper_anchor)] {
let seq: Vec<char> = nfd(core::iter::once(ch)).collect();
let ces = collation_elements(nfd(anchor_form.into_iter()).collect());
if !seq.is_empty() && !ces.is_empty() {
entries.push((seq, ces));
}
}
}
fn match_at(&self, rest: &[char]) -> Option<(usize, &[u64])> {
for (seq, ces) in &self.entries {
if rest.len() >= seq.len() && rest[..seq.len()] == seq[..] {
return Some((seq.len(), ces));
}
}
None
}
#[must_use]
pub fn sort_key(&self, s: &str) -> Vec<u16> {
let cv: Vec<char> = nfd(s.chars()).collect();
let mut cea = Vec::new();
let mut buf: Vec<char> = Vec::new();
let mut i = 0;
while i < cv.len() {
if let Some((len, ces)) = self.match_at(&cv[i..]) {
if !buf.is_empty() {
cea.extend(collation_elements(core::mem::take(&mut buf)));
}
cea.extend_from_slice(ces);
i += len;
} else {
buf.push(cv[i]);
i += 1;
}
}
if !buf.is_empty() {
cea.extend(collation_elements(buf));
}
build_sort_key(&cea, AlternateHandling::Shifted, Strength::Tertiary)
}
#[must_use]
pub fn compare(&self, a: &str, b: &str) -> Ordering {
self.sort_key(a).cmp(&self.sort_key(b))
}
}
fn upper(c: char) -> char {
super::case::to_uppercase(c).next().unwrap_or(c)
}