use zerofrom::ZeroFrom;
use zerovec::{ZeroSlice, ZeroVec};
const MAX_BRANCH_LINEAR_SUB_NODE_LENGTH: usize = 5;
const MIN_LINEAR_MATCH: u16 = 0x30;
const MAX_LINEAR_MATCH_LENGTH: u16 = 0x10;
const MIN_VALUE_LEAD: u16 = MIN_LINEAR_MATCH + MAX_LINEAR_MATCH_LENGTH; const NODE_TYPE_MASK: u16 = MIN_VALUE_LEAD - 1;
const VALUE_IS_FINAL: u16 = 0x8000;
const MAX_ONE_UNIT_VALUE: u16 = 0x3fff;
const MIN_TWO_UNIT_VALUE_LEAD: u16 = MAX_ONE_UNIT_VALUE + 1;
const MAX_ONE_UNIT_NODE_VALUE: u16 = 0xff;
const MIN_TWO_UNIT_NODE_VALUE_LEAD: u16 = MIN_VALUE_LEAD + ((MAX_ONE_UNIT_NODE_VALUE + 1) << 6);
const THREE_UNIT_NODE_VALUE_LEAD: u16 = 0x7fc0;
const THREE_UNIT_VALUE_LEAD: u16 = 0x7fff;
const MAX_ONE_UNIT_DELTA: u16 = 0xfbff;
const MIN_TWO_UNIT_DELTA_LEAD: u16 = MAX_ONE_UNIT_DELTA + 1; const THREE_UNIT_DELTA_LEAD: u16 = 0xffff;
fn skip_value(pos: usize, lead: u16) -> usize {
if lead < MIN_TWO_UNIT_VALUE_LEAD {
pos
} else if lead < THREE_UNIT_VALUE_LEAD {
pos + 1
} else {
pos + 2
}
}
fn skip_node_value(pos: usize, lead: u16) -> usize {
if lead < MIN_TWO_UNIT_NODE_VALUE_LEAD {
pos
} else if lead < THREE_UNIT_NODE_VALUE_LEAD {
pos + 1
} else {
pos + 2
}
}
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
#[cfg_attr(feature = "databake", derive(databake::Bake))]
#[cfg_attr(feature = "databake", databake(path = icu_collections::char16trie))]
#[derive(Clone, Debug, PartialEq, Eq, ZeroFrom)]
#[allow(clippy::exhaustive_structs)] pub struct Char16Trie<'data> {
#[cfg_attr(feature = "serde", serde(borrow))]
#[doc(hidden)] pub data: ZeroVec<'data, u16>,
}
impl<'data> Char16Trie<'data> {
#[inline]
pub fn new(data: ZeroVec<'data, u16>) -> Self {
Self { data }
}
#[inline]
pub fn iter(&self) -> Char16TrieIterator<'_> {
Char16TrieIterator::new(&self.data)
}
}
#[derive(Clone, Debug)]
pub struct Char16TrieIterator<'a> {
trie: &'a ZeroSlice<u16>,
pos: Option<usize>,
remaining_match_length: Option<usize>,
}
#[derive(Clone, Copy, Debug, PartialEq)]
#[allow(clippy::exhaustive_enums)]
pub enum TrieResult {
NoMatch,
NoValue,
FinalValue(i32),
Intermediate(i32),
}
fn u16_lead(supplementary: i32) -> u16 {
(((supplementary) >> 10) + 0xd7c0) as u16
}
fn u16_tail(supplementary: i32) -> u16 {
(((supplementary) & 0x3ff) | 0xdc00) as u16
}
macro_rules! trie_unwrap {
($option:expr) => {
match $option {
Some(x) => x,
None => {
debug_assert!(false);
return TrieResult::NoMatch;
}
}
};
}
impl<'a> Char16TrieIterator<'a> {
#[inline]
pub fn new(trie: &'a ZeroSlice<u16>) -> Self {
Self {
trie,
pos: Some(0),
remaining_match_length: None,
}
}
pub fn next(&mut self, c: char) -> TrieResult {
if (c as u32) <= 0xffff {
self.next16(c as u16)
} else {
match self.next16(u16_lead(c as i32)) {
TrieResult::NoValue | TrieResult::Intermediate(_) => {
self.next16(u16_tail(c as i32))
}
_ => TrieResult::NoMatch,
}
}
}
pub fn next32(&mut self, c: u32) -> TrieResult {
if c <= 0xffff {
self.next16(c as u16)
} else {
match self.next16(u16_lead(c as i32)) {
TrieResult::NoValue | TrieResult::Intermediate(_) => {
self.next16(u16_tail(c as i32))
}
_ => TrieResult::NoMatch,
}
}
}
pub fn next16(&mut self, c: u16) -> TrieResult {
let mut pos = match self.pos {
Some(p) => p,
None => return TrieResult::NoMatch,
};
if let Some(length) = self.remaining_match_length {
if c == trie_unwrap!(self.trie.get(pos)) {
pos += 1;
self.pos = Some(pos);
if length == 0 {
self.remaining_match_length = None;
let node = trie_unwrap!(self.trie.get(pos));
if node >= MIN_VALUE_LEAD {
return self.value_result(pos);
}
} else {
self.remaining_match_length = Some(length - 1);
}
return TrieResult::NoValue;
}
self.stop();
TrieResult::NoMatch
} else {
self.next_impl(pos, c)
}
}
fn branch_next(&mut self, pos: usize, length: usize, in_unit: u16) -> TrieResult {
let mut pos = pos;
let mut length = length;
if length == 0 {
length = trie_unwrap!(self.trie.get(pos)) as usize;
pos += 1;
}
length += 1;
while length > MAX_BRANCH_LINEAR_SUB_NODE_LENGTH {
if in_unit < trie_unwrap!(self.trie.get(pos)) {
length >>= 1;
pos = trie_unwrap!(self.jump_by_delta(pos + 1));
} else {
length = length - (length >> 1);
pos = trie_unwrap!(self.skip_delta(pos + 1));
}
}
loop {
if in_unit == trie_unwrap!(self.trie.get(pos)) {
pos += 1;
let mut node = trie_unwrap!(self.trie.get(pos));
if node & VALUE_IS_FINAL != 0 {
self.pos = Some(pos);
return self.value_result(pos);
}
pos += 1;
if node < MIN_TWO_UNIT_VALUE_LEAD {
pos += node as usize;
} else if node < THREE_UNIT_VALUE_LEAD {
pos += (((node - MIN_TWO_UNIT_VALUE_LEAD) as u32) << 16) as usize
| trie_unwrap!(self.trie.get(pos)) as usize;
pos += 1;
} else {
pos += ((trie_unwrap!(self.trie.get(pos)) as usize) << 16)
| trie_unwrap!(self.trie.get(pos + 1)) as usize;
pos += 2;
}
node = trie_unwrap!(self.trie.get(pos));
self.pos = Some(pos);
if node >= MIN_VALUE_LEAD {
return self.value_result(pos);
}
return TrieResult::NoValue;
}
length -= 1;
pos = trie_unwrap!(self.skip_value(pos + 1));
if length <= 1 {
break;
}
}
if in_unit == trie_unwrap!(self.trie.get(pos)) {
pos += 1;
self.pos = Some(pos);
let node = trie_unwrap!(self.trie.get(pos));
if node >= MIN_VALUE_LEAD {
return self.value_result(pos);
}
TrieResult::NoValue
} else {
self.stop();
TrieResult::NoMatch
}
}
fn next_impl(&mut self, pos: usize, in_unit: u16) -> TrieResult {
let mut node = trie_unwrap!(self.trie.get(pos));
let mut pos = pos + 1;
loop {
if node < MIN_LINEAR_MATCH {
return self.branch_next(pos, node as usize, in_unit);
} else if node < MIN_VALUE_LEAD {
let length = node - MIN_LINEAR_MATCH;
if in_unit == trie_unwrap!(self.trie.get(pos)) {
pos += 1;
if length == 0 {
self.remaining_match_length = None;
self.pos = Some(pos);
node = trie_unwrap!(self.trie.get(pos));
if node >= MIN_VALUE_LEAD {
return self.value_result(pos);
}
return TrieResult::NoValue;
}
self.remaining_match_length = Some(length as usize - 1);
self.pos = Some(pos);
return TrieResult::NoValue;
}
break;
} else if (node & VALUE_IS_FINAL) != 0 {
break;
} else {
pos = skip_node_value(pos, node);
node &= NODE_TYPE_MASK;
}
}
self.stop();
TrieResult::NoMatch
}
fn stop(&mut self) {
self.pos = None;
}
#[inline(always)] fn jump_by_delta(&self, pos: usize) -> Option<usize> {
let delta = self.trie.get(pos)?;
let v = if delta < MIN_TWO_UNIT_DELTA_LEAD {
pos + 1 + delta as usize
} else if delta == THREE_UNIT_DELTA_LEAD {
let delta =
((self.trie.get(pos + 1)? as usize) << 16) | (self.trie.get(pos + 2)? as usize);
pos + delta + 3
} else {
let delta = (((delta - MIN_TWO_UNIT_DELTA_LEAD) as usize) << 16)
| (self.trie.get(pos + 1)? as usize);
pos + delta + 2
};
Some(v)
}
#[inline(always)] fn skip_value(&self, pos: usize) -> Option<usize> {
let lead_unit = self.trie.get(pos)?;
Some(skip_value(pos + 1, lead_unit & 0x7fff))
}
#[inline(always)] fn skip_delta(&self, pos: usize) -> Option<usize> {
let delta = self.trie.get(pos)?;
let v = if delta < MIN_TWO_UNIT_DELTA_LEAD {
pos + 1
} else if delta == THREE_UNIT_DELTA_LEAD {
pos + 3
} else {
pos + 2
};
Some(v)
}
fn value_result(&self, pos: usize) -> TrieResult {
match self.get_value(pos) {
Some(result) => result,
None => {
debug_assert!(false);
TrieResult::NoMatch
}
}
}
#[inline(always)] fn get_value(&self, pos: usize) -> Option<TrieResult> {
let lead_unit = self.trie.get(pos)?;
if lead_unit & VALUE_IS_FINAL == VALUE_IS_FINAL {
self.read_value(pos + 1, lead_unit & 0x7fff)
.map(TrieResult::FinalValue)
} else {
self.read_node_value(pos + 1, lead_unit)
.map(TrieResult::Intermediate)
}
}
#[inline(always)] fn read_value(&self, pos: usize, lead_unit: u16) -> Option<i32> {
let v = if lead_unit < MIN_TWO_UNIT_VALUE_LEAD {
lead_unit.into()
} else if lead_unit < THREE_UNIT_VALUE_LEAD {
(((lead_unit - MIN_TWO_UNIT_VALUE_LEAD) as i32) << 16) | self.trie.get(pos)? as i32
} else {
((self.trie.get(pos)? as i32) << 16) | self.trie.get(pos + 1)? as i32
};
Some(v)
}
#[inline(always)] fn read_node_value(&self, pos: usize, lead_unit: u16) -> Option<i32> {
let v = if lead_unit < (MIN_TWO_UNIT_NODE_VALUE_LEAD) {
((lead_unit >> 6) - 1).into()
} else if lead_unit < THREE_UNIT_NODE_VALUE_LEAD {
((((lead_unit & 0x7fc0) - MIN_TWO_UNIT_NODE_VALUE_LEAD) as i32) << 10)
| self.trie.get(pos)? as i32
} else {
((self.trie.get(pos)? as i32) << 16) | self.trie.get(pos + 1)? as i32
};
Some(v)
}
}