use crate::byte_phf::PerfectByteHashMap;
use crate::cursor::AsciiProbeResult;
use crate::helpers::*;
use crate::options::*;
use crate::varint::read_varint_meta2;
use crate::varint::read_varint_meta3;
#[cfg(feature = "alloc")]
use alloc::string::String;
#[inline]
fn get_branch(mut trie: &[u8], i: usize, n: usize, mut w: usize) -> &[u8] {
let mut p = 0usize;
let mut q = 0usize;
loop {
let indices;
(indices, trie) = trie.debug_split_at(n - 1);
p = (p << 8)
+ if i == 0 {
0
} else {
*indices.get(i - 1).debug_unwrap_or(&0) as usize
};
q = match indices.get(i) {
Some(x) => (q << 8) + *x as usize,
None => trie.len(),
};
if w == 0 {
break;
}
w -= 1;
}
trie.get(p..q).debug_unwrap_or(&[])
}
#[inline]
fn get_branch_w0(mut trie: &[u8], i: usize, n: usize) -> &[u8] {
let indices;
(indices, trie) = trie.debug_split_at(n - 1);
let p = if i == 0 {
0
} else {
*indices.get(i - 1).debug_unwrap_or(&0) as usize
};
let q = match indices.get(i) {
Some(x) => *x as usize,
None => trie.len(),
};
trie.get(p..q).debug_unwrap_or(&[])
}
enum NodeType {
Ascii,
Span,
Value,
Branch,
}
impl core::fmt::Debug for NodeType {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
use NodeType::*;
f.write_str(match *self {
Ascii => "a",
Span => "s",
Value => "v",
Branch => "m",
})
}
}
#[inline]
fn byte_type(b: u8) -> NodeType {
match b & 0b11100000 {
0b10000000 => NodeType::Value,
0b10100000 => NodeType::Span,
0b11000000 => NodeType::Branch,
0b11100000 => NodeType::Branch,
_ => NodeType::Ascii,
}
}
#[inline]
pub(crate) fn get_parameterized<T: ZeroTrieWithOptions + ?Sized>(
mut trie: &[u8],
mut ascii: &[u8],
) -> Option<usize> {
loop {
let (b, x, i, search);
(b, trie) = trie.split_first()?;
let byte_type = byte_type(*b);
(x, trie) = match byte_type {
NodeType::Ascii => (0, trie),
NodeType::Span => {
if matches!(T::OPTIONS.ascii_mode, AsciiMode::BinarySpans) {
read_varint_meta3(*b, trie)
} else {
debug_assert!(false, "Span node found in ASCII trie!");
return None;
}
}
NodeType::Value => read_varint_meta3(*b, trie),
NodeType::Branch => read_varint_meta2(*b, trie),
};
if let Some((c, temp)) = ascii.split_first() {
if matches!(byte_type, NodeType::Ascii) {
let is_match = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase)
{
b.eq_ignore_ascii_case(c)
} else {
b == c
};
if is_match {
ascii = temp;
continue;
} else {
return None;
}
}
if matches!(byte_type, NodeType::Value) {
continue;
}
if matches!(T::OPTIONS.ascii_mode, AsciiMode::BinarySpans)
&& matches!(byte_type, NodeType::Span)
{
let (trie_span, ascii_span);
(trie_span, trie) = trie.debug_split_at(x);
(ascii_span, ascii) = ascii.split_at_checked(x)?;
if trie_span == ascii_span {
continue;
} else {
return None;
}
}
let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
let w = if matches!(T::OPTIONS.capacity_mode, CapacityMode::Extended) {
w
} else {
debug_assert!(w <= 3, "get: w > 3 but we assume w <= 3");
w & 0x3
};
let x = if x == 0 { 256 } else { x };
if matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly) || x < 16 {
(search, trie) = trie.debug_split_at(x);
let bsearch_result =
if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) {
search.binary_search_by_key(&c.to_ascii_lowercase(), |x| {
x.to_ascii_lowercase()
})
} else {
search.binary_search(c)
};
i = bsearch_result.ok()?;
} else {
(search, trie) = trie.debug_split_at(x * 2 + 1);
i = PerfectByteHashMap::from_store(search).get(*c)?;
}
trie = if w == 0 {
get_branch_w0(trie, i, x)
} else {
get_branch(trie, i, x, w)
};
ascii = temp;
continue;
} else {
if matches!(byte_type, NodeType::Value) {
return Some(x);
}
return None;
}
}
}
#[inline]
pub(crate) fn step_parameterized<T: ZeroTrieWithOptions + ?Sized>(
trie: &mut &[u8],
c: u8,
) -> Option<u8> {
debug_assert!(
matches!(T::OPTIONS.ascii_mode, AsciiMode::AsciiOnly),
"Spans not yet implemented in step function"
);
debug_assert!(
matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly),
"PHF not yet implemented in step function"
);
debug_assert!(
matches!(T::OPTIONS.capacity_mode, CapacityMode::Normal),
"Extended capacity not yet implemented in step function"
);
let (mut b, x, search);
loop {
(b, *trie) = match trie.split_first() {
Some(v) => v,
None => {
return None;
}
};
match byte_type(*b) {
NodeType::Ascii => {
let is_match = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase)
{
b.eq_ignore_ascii_case(&c)
} else {
*b == c
};
if is_match {
return Some(*b);
} else {
*trie = &[];
return None;
}
}
NodeType::Branch => {
(x, *trie) = read_varint_meta2(*b, trie);
break;
}
NodeType::Span => {
debug_assert!(false, "Span node found in ASCII trie!");
return None;
}
NodeType::Value => {
(_, *trie) = read_varint_meta3(*b, trie);
continue;
}
};
}
let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
debug_assert!(w <= 3, "get: w > 3 but we assume w <= 3");
let w = w & 0x3;
let x = if x == 0 { 256 } else { x };
(search, *trie) = trie.debug_split_at(x);
let bsearch_result = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) {
search.binary_search_by_key(&c.to_ascii_lowercase(), |x| x.to_ascii_lowercase())
} else {
search.binary_search(&c)
};
match bsearch_result {
Ok(i) => {
*trie = if w == 0 {
get_branch_w0(trie, i, x)
} else {
get_branch(trie, i, x, w)
};
#[allow(clippy::indexing_slicing)] Some(search[i])
}
Err(_) => {
*trie = &[];
None
}
}
}
#[inline]
pub(crate) fn probe_parameterized<T: ZeroTrieWithOptions + ?Sized>(
trie: &mut &[u8],
index: usize,
) -> Option<AsciiProbeResult> {
debug_assert!(
matches!(T::OPTIONS.ascii_mode, AsciiMode::AsciiOnly),
"Spans not yet implemented in step function"
);
debug_assert!(
matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly),
"PHF not yet implemented in step function"
);
debug_assert!(
matches!(T::OPTIONS.capacity_mode, CapacityMode::Normal),
"Extended capacity not yet implemented in step function"
);
let (mut b, x, search);
loop {
(b, *trie) = match trie.split_first() {
Some(v) => v,
None => {
return None;
}
};
match byte_type(*b) {
NodeType::Ascii => {
if index > 0 {
*trie = &[];
return None;
}
return Some(AsciiProbeResult {
byte: *b,
total_siblings: 1,
});
}
NodeType::Branch => {
(x, *trie) = read_varint_meta2(*b, trie);
break;
}
NodeType::Span => {
debug_assert!(false, "Span node found in ASCII trie!");
return None;
}
NodeType::Value => {
(_, *trie) = read_varint_meta3(*b, trie);
continue;
}
};
}
let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
debug_assert!(u8::try_from(x).is_ok());
let total_siblings = x as u8;
debug_assert!(w <= 3, "get: w > 3 but we assume w <= 3");
let w = w & 0x3;
let x = if x == 0 { 256 } else { x };
if index >= x {
*trie = &[];
return None;
}
(search, *trie) = trie.debug_split_at(x);
*trie = if w == 0 {
get_branch_w0(trie, index, x)
} else {
get_branch(trie, index, x, w)
};
Some(AsciiProbeResult {
#[allow(clippy::indexing_slicing)] byte: search[index],
total_siblings,
})
}
pub(crate) fn take_value(trie: &mut &[u8]) -> Option<usize> {
let (b, new_trie) = trie.split_first()?;
match byte_type(*b) {
NodeType::Ascii | NodeType::Span | NodeType::Branch => None,
NodeType::Value => {
let x;
(x, *trie) = read_varint_meta3(*b, new_trie);
Some(x)
}
}
}
#[cfg(feature = "alloc")]
use alloc::vec::Vec;
#[cfg(feature = "alloc")]
#[derive(Debug)]
pub struct ZeroTrieIterator<'a> {
use_phf: bool,
state: Vec<(&'a [u8], Vec<u8>, usize)>,
}
#[cfg(feature = "alloc")]
impl<'a> ZeroTrieIterator<'a> {
pub(crate) fn new<S: AsRef<[u8]> + ?Sized>(store: &'a S, use_phf: bool) -> Self {
ZeroTrieIterator {
use_phf,
state: alloc::vec![(store.as_ref(), alloc::vec![], 0)],
}
}
}
#[cfg(feature = "alloc")]
impl Iterator for ZeroTrieIterator<'_> {
type Item = (Vec<u8>, usize);
fn next(&mut self) -> Option<Self::Item> {
let (mut trie, mut string, mut branch_idx);
(trie, string, branch_idx) = self.state.pop()?;
loop {
let (b, x, span, search);
let return_trie = trie;
(b, trie) = match trie.split_first() {
Some(tpl) => tpl,
None => {
(trie, string, branch_idx) = self.state.pop()?;
continue;
}
};
let byte_type = byte_type(*b);
if matches!(byte_type, NodeType::Ascii) {
string.push(*b);
continue;
}
(x, trie) = match byte_type {
NodeType::Ascii => (0, trie),
NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie),
NodeType::Branch => read_varint_meta2(*b, trie),
};
if matches!(byte_type, NodeType::Span) {
(span, trie) = trie.debug_split_at(x);
string.extend(span);
continue;
}
if matches!(byte_type, NodeType::Value) {
let retval = string.clone();
self.state.push((trie, string, 0));
return Some((retval, x));
}
let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
let x = if x == 0 { 256 } else { x };
if branch_idx + 1 < x {
self.state
.push((return_trie, string.clone(), branch_idx + 1));
}
let byte = if x < 16 || !self.use_phf {
(search, trie) = trie.debug_split_at(x);
debug_unwrap!(search.get(branch_idx), return None)
} else {
(search, trie) = trie.debug_split_at(x * 2 + 1);
debug_unwrap!(search.get(branch_idx + x + 1), return None)
};
string.push(*byte);
trie = if w == 0 {
get_branch_w0(trie, branch_idx, x)
} else {
get_branch(trie, branch_idx, x, w)
};
branch_idx = 0;
}
}
}
#[cfg(feature = "alloc")]
pub(crate) fn get_iter_phf<S: AsRef<[u8]> + ?Sized>(store: &S) -> ZeroTrieIterator<'_> {
ZeroTrieIterator::new(store, true)
}
#[cfg(feature = "alloc")]
#[expect(clippy::type_complexity)]
pub(crate) fn get_iter_ascii_or_panic<S: AsRef<[u8]> + ?Sized>(
store: &S,
) -> core::iter::Map<ZeroTrieIterator<'_>, fn((Vec<u8>, usize)) -> (String, usize)> {
ZeroTrieIterator::new(store, false).map(|(k, v)| {
#[expect(clippy::unwrap_used)] let ascii_str = String::from_utf8(k).unwrap();
(ascii_str, v)
})
}