use crate::value::DictionaryValue;
use crate::{DictionaryNode, MappedDictionaryNode};
use pathmap::utils::ByteMask;
use pathmap::zipper::{
TrieRefBorrowed, TrieRefOwned, Zipper, ZipperReadOnlySubtries, ZipperValues,
};
use pathmap::PathMap;
use std::marker::PhantomData;
mod sealed {
pub trait Sealed {}
}
pub trait TrieRefLike<V>: Clone + Send + Sync + sealed::Sealed {
fn path_exists(&self) -> bool;
fn is_val(&self) -> bool;
fn val_cloned(&self) -> Option<V>;
fn child_mask(&self) -> ByteMask;
fn child_count(&self) -> usize;
fn descend_bytes(&self, bytes: &[u8]) -> Self;
}
impl<V: DictionaryValue> sealed::Sealed for TrieRefOwned<V> {}
impl<V: DictionaryValue> TrieRefLike<V> for TrieRefOwned<V> {
#[inline]
fn path_exists(&self) -> bool {
Zipper::path_exists(self)
}
#[inline]
fn is_val(&self) -> bool {
Zipper::is_val(self)
}
#[inline]
fn val_cloned(&self) -> Option<V> {
ZipperValues::val(self).cloned()
}
#[inline]
fn child_mask(&self) -> ByteMask {
Zipper::child_mask(self)
}
#[inline]
fn child_count(&self) -> usize {
Zipper::child_count(self)
}
#[inline]
fn descend_bytes(&self, bytes: &[u8]) -> Self {
self.trie_ref_at_path(bytes)
}
}
impl<V: DictionaryValue> sealed::Sealed for TrieRefBorrowed<'_, V> {}
impl<'a, V: DictionaryValue> TrieRefLike<V> for TrieRefBorrowed<'a, V> {
#[inline]
fn path_exists(&self) -> bool {
Zipper::path_exists(self)
}
#[inline]
fn is_val(&self) -> bool {
Zipper::is_val(self)
}
#[inline]
fn val_cloned(&self) -> Option<V> {
ZipperValues::val(self).cloned()
}
#[inline]
fn child_mask(&self) -> ByteMask {
Zipper::child_mask(self)
}
#[inline]
fn child_count(&self) -> usize {
Zipper::child_count(self)
}
#[inline]
fn descend_bytes(&self, bytes: &[u8]) -> Self {
self.trie_ref_at_path(bytes)
}
}
#[inline]
pub fn trie_ref_root<V: DictionaryValue>(map: PathMap<V>) -> TrieRefOwned<V> {
map.into_read_zipper::<&[u8]>(&[])
.trie_ref_at_path::<&[u8]>(&[])
}
#[inline]
pub fn trie_ref_root_borrowed<V: DictionaryValue>(map: &PathMap<V>) -> TrieRefBorrowed<'_, V> {
map.trie_ref_at_path::<&[u8]>(&[])
}
#[inline]
pub(crate) fn utf8_sequence_len(first_byte: u8) -> Option<usize> {
if first_byte & 0b1000_0000 == 0 {
Some(1)
} else if first_byte & 0b1110_0000 == 0b1100_0000 {
Some(2)
} else if first_byte & 0b1111_0000 == 0b1110_0000 {
Some(3)
} else if first_byte & 0b1111_1000 == 0b1111_0000 {
Some(4)
} else {
None
}
}
pub struct TrieRefNode<V: DictionaryValue, R: TrieRefLike<V> = TrieRefOwned<V>> {
r: R,
_v: PhantomData<fn() -> V>,
}
impl<V: DictionaryValue, R: TrieRefLike<V>> TrieRefNode<V, R> {
#[inline]
pub fn new(r: R) -> Self {
Self { r, _v: PhantomData }
}
#[inline]
pub fn trie_ref(&self) -> &R {
&self.r
}
}
impl<V: DictionaryValue, R: TrieRefLike<V>> Clone for TrieRefNode<V, R> {
#[inline]
fn clone(&self) -> Self {
Self {
r: self.r.clone(),
_v: PhantomData,
}
}
}
impl<V: DictionaryValue, R: TrieRefLike<V>> DictionaryNode for TrieRefNode<V, R> {
type Unit = u8;
#[inline]
fn is_final(&self) -> bool {
self.r.is_val()
}
#[inline]
fn transition(&self, label: u8) -> Option<Self> {
let child = self.r.descend_bytes(&[label]);
if child.path_exists() {
Some(Self::new(child))
} else {
None
}
}
fn edges(&self) -> Box<dyn Iterator<Item = (u8, Self)> + '_> {
let r = self.r.clone();
Box::new(self.r.child_mask().iter().map(move |byte| {
let child = r.descend_bytes(&[byte]);
(byte, Self::new(child))
}))
}
#[inline]
fn edge_count(&self) -> Option<usize> {
Some(self.r.child_count())
}
}
impl<V: DictionaryValue, R: TrieRefLike<V>> MappedDictionaryNode for TrieRefNode<V, R> {
type Value = V;
#[inline]
fn value(&self) -> Option<Self::Value> {
self.r.val_cloned()
}
}
pub struct TrieRefNodeChar<V: DictionaryValue, R: TrieRefLike<V> = TrieRefOwned<V>> {
r: R,
_v: PhantomData<fn() -> V>,
}
impl<V: DictionaryValue, R: TrieRefLike<V>> TrieRefNodeChar<V, R> {
#[inline]
pub fn new(r: R) -> Self {
Self { r, _v: PhantomData }
}
#[inline]
pub fn trie_ref(&self) -> &R {
&self.r
}
}
impl<V: DictionaryValue, R: TrieRefLike<V>> Clone for TrieRefNodeChar<V, R> {
#[inline]
fn clone(&self) -> Self {
Self {
r: self.r.clone(),
_v: PhantomData,
}
}
}
impl<V: DictionaryValue, R: TrieRefLike<V>> DictionaryNode for TrieRefNodeChar<V, R> {
type Unit = char;
#[inline]
fn is_final(&self) -> bool {
self.r.is_val()
}
fn transition(&self, label: char) -> Option<Self> {
let mut buf = [0u8; 4];
let bytes = label.encode_utf8(&mut buf).as_bytes();
let child = self.r.descend_bytes(bytes);
if child.path_exists() {
Some(Self::new(child))
} else {
None
}
}
fn edges(&self) -> Box<dyn Iterator<Item = (char, Self)> + '_> {
let mut char_edges: Vec<(char, R)> = Vec::new();
for first_byte in self.r.child_mask().iter() {
let seq_len = match utf8_sequence_len(first_byte) {
Some(len) => len,
None => continue, };
let mut partials: Vec<Vec<u8>> = vec![vec![first_byte]];
for _ in 1..seq_len {
let mut next_partials = Vec::new();
for partial in &partials {
let node = self.r.descend_bytes(partial);
if !node.path_exists() {
continue;
}
for cont_byte in node.child_mask().iter() {
if (cont_byte & 0b1100_0000) == 0b1000_0000 {
let mut extended = Vec::with_capacity(partial.len() + 1);
extended.extend_from_slice(partial);
extended.push(cont_byte);
next_partials.push(extended);
}
}
}
partials = next_partials;
}
for utf8_bytes in partials {
if utf8_bytes.len() != seq_len {
continue;
}
if let Ok(s) = std::str::from_utf8(&utf8_bytes) {
let mut chars = s.chars();
if let (Some(c), None) = (chars.next(), chars.next()) {
char_edges.push((c, self.r.descend_bytes(&utf8_bytes)));
}
}
}
}
Box::new(
char_edges
.into_iter()
.map(|(c, child)| (c, Self::new(child))),
)
}
#[inline]
fn edge_count(&self) -> Option<usize> {
Some(self.edges().count())
}
}
impl<V: DictionaryValue, R: TrieRefLike<V>> MappedDictionaryNode for TrieRefNodeChar<V, R> {
type Value = V;
#[inline]
fn value(&self) -> Option<Self::Value> {
self.r.val_cloned()
}
}
#[cfg(test)]
mod tests {
use super::*;
use pathmap::PathMap;
fn map_of(pairs: &[(&str, u32)]) -> PathMap<u32> {
let mut map = PathMap::new();
for (term, value) in pairs {
map.insert(term.as_bytes(), *value);
}
map
}
#[test]
fn owned_root_descends_and_reports_finality() {
let map = map_of(&[("test", 1), ("testing", 2)]);
let node = TrieRefNode::<u32>::new(trie_ref_root(map));
let t = node.transition(b't').expect("'t'");
let e = t.transition(b'e').expect("'e'");
let s = e.transition(b's').expect("'s'");
let t2 = s.transition(b't').expect("second 't'");
assert!(t2.is_final(), "'test' is a term");
assert_eq!(t2.value(), Some(1));
let i = t2.transition(b'i').expect("'i'");
assert!(!i.is_final());
assert_eq!(i.value(), None);
assert!(node.transition(b'z').is_none());
}
#[test]
fn owned_edges_match_child_mask_without_revalidation() {
let map = map_of(&[("ab", 1), ("ac", 2), ("ad", 3)]);
let node = TrieRefNode::<u32>::new(trie_ref_root(map));
let a = node.transition(b'a').expect("'a'");
let mut labels: Vec<u8> = a.edges().map(|(b, _)| b).collect();
labels.sort_unstable();
assert_eq!(labels, vec![b'b', b'c', b'd']);
assert_eq!(a.edge_count(), Some(3));
}
#[test]
fn borrowed_root_reads_without_copying() {
let map = map_of(&[("cat", 7), ("car", 8)]);
let node = TrieRefNode::new(trie_ref_root_borrowed(&map));
let c = node.transition(b'c').expect("'c'");
let a = c.transition(b'a').expect("'a'");
let t = a.transition(b't').expect("'t'");
assert!(t.is_final());
assert_eq!(t.value(), Some(7));
let mut labels: Vec<u8> = a.edges().map(|(b, _)| b).collect();
labels.sort_unstable();
assert_eq!(labels, vec![b'r', b't']);
}
#[test]
fn empty_map_root_has_no_children_and_is_not_final() {
let map: PathMap<u32> = PathMap::new();
let node = TrieRefNode::<u32>::new(trie_ref_root(map));
assert!(!node.is_final());
assert_eq!(node.edge_count(), Some(0));
assert_eq!(node.edges().count(), 0);
assert!(node.transition(b'a').is_none());
}
#[test]
fn root_value_and_empty_string_term() {
let mut map: PathMap<u32> = PathMap::new();
map.insert(b"", 99);
let node = TrieRefNode::<u32>::new(trie_ref_root(map));
assert!(node.is_final(), "empty-string term makes the root final");
assert_eq!(node.value(), Some(99));
}
#[test]
fn descending_a_long_dangling_path_does_not_panic() {
let map = map_of(&[("hello", 1)]);
let root = trie_ref_root(map);
let dangling = vec![b'z'; 80];
let node = TrieRefNode::<u32>::new(root.descend_bytes(&dangling));
assert!(!TrieRefLike::path_exists(&node.r));
assert!(!node.is_final());
assert_eq!(node.value(), None);
assert_eq!(node.edge_count(), Some(0));
assert_eq!(node.edges().count(), 0);
let short = trie_ref_root(map_of(&[("hello", 1)])).descend_bytes(b"hel_no");
assert!(!TrieRefLike::path_exists(&short));
assert!(!TrieRefLike::is_val(&short));
}
#[test]
fn char_node_traverses_unicode() {
let map = map_of(&[("café", 1), ("car", 2), ("中文", 3), ("🎉", 4)]);
let node = TrieRefNodeChar::<u32>::new(trie_ref_root(map));
let c = node.transition('c').expect("'c'");
let a = c.transition('a').expect("'a'");
let f = a.transition('f').expect("'f'");
let e = f.transition('é').expect("'é'");
assert!(e.is_final());
assert_eq!(e.value(), Some(1));
assert!(!f.is_final());
let zhong = node.transition('中').expect("'中'");
let wen = zhong.transition('文').expect("'文'");
assert!(wen.is_final());
assert_eq!(wen.value(), Some(3));
let party = node.transition('🎉').expect("'🎉'");
assert!(party.is_final());
assert_eq!(party.value(), Some(4));
}
#[test]
fn char_node_edges_decode_mixed_widths() {
let map = map_of(&[("café", 1), ("car", 2), ("cart", 3)]);
let node = TrieRefNodeChar::<u32>::new(trie_ref_root(map));
let c = node.transition('c').expect("'c'");
let a = c.transition('a').expect("'a'");
let labels: Vec<char> = a.edges().map(|(ch, _)| ch).collect();
assert!(labels.contains(&'f'), "café branch");
assert!(labels.contains(&'r'), "car branch");
assert_eq!(labels.len(), 2);
assert_eq!(a.edge_count(), Some(2));
}
#[test]
fn char_node_borrowed_variant() {
let map = map_of(&[("中文", 3)]);
let node = TrieRefNodeChar::new(trie_ref_root_borrowed(&map));
let zhong = node.transition('中').expect("'中'");
assert!(!zhong.is_final());
let wen = zhong.transition('文').expect("'文'");
assert!(wen.is_final());
assert_eq!(wen.value(), Some(3));
}
}