use crate::value::DictionaryValue;
use crate::{Dictionary, DictionaryNode, MappedDictionary, MappedDictionaryNode, SyncStrategy};
use pathmap::utils::BitMask;
use pathmap::zipper::{Zipper, ZipperMoving, ZipperValues};
use pathmap::PathMap;
use std::sync::Arc;
use crate::sync_compat::RwLock;
#[derive(Clone, Debug)]
pub struct PathMapDictionaryChar<V: DictionaryValue = ()> {
map: Arc<RwLock<PathMap<V>>>,
term_count: Arc<RwLock<usize>>,
}
impl<V: DictionaryValue> PathMapDictionaryChar<V> {
pub fn new() -> Self
where
V: Default,
{
Self {
map: Arc::new(RwLock::new(PathMap::new())),
term_count: Arc::new(RwLock::new(0)),
}
}
pub fn from_terms<I, S>(terms: I) -> Self
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
V: Default,
{
let mut map = PathMap::new();
let mut count = 0;
for term in terms {
let bytes = term.as_ref().as_bytes();
if map.insert(bytes, V::default()).is_none() {
count += 1;
}
}
Self {
map: Arc::new(RwLock::new(map)),
term_count: Arc::new(RwLock::new(count)),
}
}
pub fn from_terms_with_values<I, S>(terms: I) -> Self
where
I: IntoIterator<Item = (S, V)>,
S: AsRef<str>,
{
let mut map = PathMap::new();
let mut count = 0;
for (term, value) in terms {
let bytes = term.as_ref().as_bytes();
if map.insert(bytes, value).is_none() {
count += 1;
}
}
Self {
map: Arc::new(RwLock::new(map)),
term_count: Arc::new(RwLock::new(count)),
}
}
pub fn insert(&self, term: &str) -> bool
where
V: Default,
{
self.insert_with_value(term, V::default())
}
pub fn insert_with_value(&self, term: &str, value: V) -> bool {
let bytes = term.as_bytes();
let mut map = self.map.write();
let mut count = self.term_count.write();
if map.insert(bytes, value).is_none() {
*count += 1;
true
} else {
false
}
}
pub fn remove(&self, term: &str) -> bool {
let bytes = term.as_bytes();
let mut map = self.map.write();
let mut count = self.term_count.write();
if map.remove_val_at(bytes, true).is_some() {
*count = count.saturating_sub(1);
true
} else {
false
}
}
pub fn clear(&self) {
let mut map = self.map.write();
let mut count = self.term_count.write();
*map = PathMap::new();
*count = 0;
}
pub fn term_count(&self) -> usize {
*self.term_count.read()
}
pub fn get_value(&self, term: &str) -> Option<V> {
let bytes = term.as_bytes();
let map = self.map.read();
map.get_val_at(bytes).cloned()
}
pub fn update_or_insert<F>(&self, term: &str, default_value: V, update_fn: F) -> bool
where
F: FnOnce(&mut V),
{
let bytes = term.as_bytes();
let mut map = self.map.write();
let mut count = self.term_count.write();
let existed = map.get_val_at(bytes).is_some();
let value = map.get_val_or_set_mut_at(bytes, default_value);
update_fn(value);
if !existed {
*count += 1;
}
!existed
}
}
impl<V: DictionaryValue + Default> Default for PathMapDictionaryChar<V> {
fn default() -> Self {
Self::new()
}
}
impl<V: DictionaryValue> Dictionary for PathMapDictionaryChar<V> {
type Node = PathMapNodeChar<V>;
#[inline]
fn root(&self) -> Self::Node {
PathMapNodeChar {
map: Arc::clone(&self.map),
byte_path: Arc::new(Vec::new()),
}
}
#[inline]
fn len(&self) -> Option<usize> {
Some(self.term_count())
}
#[inline]
fn sync_strategy(&self) -> SyncStrategy {
SyncStrategy::ExternalSync
}
}
impl<V: DictionaryValue> MappedDictionary for PathMapDictionaryChar<V> {
type Value = V;
fn get_value(&self, term: &str) -> Option<Self::Value> {
PathMapDictionaryChar::get_value(self, term)
}
}
impl<V: DictionaryValue + Default> crate::MutableDictionary for PathMapDictionaryChar<V> {
fn insert(&self, term: &str) -> bool {
PathMapDictionaryChar::insert(self, term)
}
fn remove(&self, term: &str) -> bool {
PathMapDictionaryChar::remove(self, term)
}
}
impl<V: DictionaryValue> crate::MutableMappedDictionary for PathMapDictionaryChar<V> {
fn insert_with_value(&self, term: &str, value: Self::Value) -> bool {
PathMapDictionaryChar::insert_with_value(self, term, value)
}
fn update_or_insert<F>(&self, term: &str, default_value: Self::Value, update_fn: F) -> bool
where
F: FnOnce(&mut Self::Value),
{
PathMapDictionaryChar::update_or_insert(self, term, default_value, update_fn)
}
fn union_with<F>(&self, other: &Self, merge_fn: F) -> usize
where
F: Fn(&Self::Value, &Self::Value) -> Self::Value,
Self::Value: Clone,
{
let other_map = other.map.read();
let mut self_map = self.map.write();
let mut self_term_count = self.term_count.write();
let mut processed = 0;
for (key_bytes, other_value) in other_map.iter() {
processed += 1;
if let Some(self_value) = self_map.get(&key_bytes) {
let merged = merge_fn(self_value, other_value);
self_map.insert(&key_bytes, merged);
} else {
self_map.insert(&key_bytes, other_value.clone());
*self_term_count += 1;
}
}
processed
}
}
#[derive(Clone)]
pub struct PathMapNodeChar<V: DictionaryValue> {
map: Arc<RwLock<PathMap<V>>>,
byte_path: Arc<Vec<u8>>, }
impl<V: DictionaryValue> PathMapNodeChar<V> {
#[inline(always)]
fn with_zipper<F, R>(&self, f: F) -> R
where
F: FnOnce(pathmap::zipper::ReadZipperUntracked<'_, 'static, V>) -> R,
{
let map = self.map.read();
let zipper = if self.byte_path.is_empty() {
map.read_zipper()
} else {
map.read_zipper_at_path(&**self.byte_path)
};
f(zipper)
}
fn utf8_sequence_len(first_byte: u8) -> Option<usize> {
if first_byte & 0b1000_0000 == 0 {
Some(1)
} else if first_byte & 0b1110_0000 == 0b1100_0000 {
Some(2)
} else if first_byte & 0b1111_0000 == 0b1110_0000 {
Some(3)
} else if first_byte & 0b1111_1000 == 0b1111_0000 {
Some(4)
} else {
None
}
}
}
impl<V: DictionaryValue> DictionaryNode for PathMapNodeChar<V> {
type Unit = char;
#[inline]
fn is_final(&self) -> bool {
self.with_zipper(|z| z.is_val())
}
fn transition(&self, label: char) -> Option<Self> {
let mut char_bytes = [0u8; 4];
let char_str = label.encode_utf8(&mut char_bytes);
let char_byte_slice = char_str.as_bytes();
let exists = self.with_zipper(|mut z| {
z.descend_to(char_byte_slice);
z.path_exists()
});
if exists {
let mut new_path = Vec::with_capacity(self.byte_path.len() + char_byte_slice.len());
new_path.extend_from_slice(&self.byte_path);
new_path.extend_from_slice(char_byte_slice);
Some(PathMapNodeChar {
map: Arc::clone(&self.map),
byte_path: Arc::new(new_path),
})
} else {
None
}
}
fn edges(&self) -> Box<dyn Iterator<Item = (char, Self)> + '_> {
let map = Arc::clone(&self.map);
let base_path = Arc::clone(&self.byte_path);
let char_edges: Vec<(char, Vec<u8>)> = {
let map_guard = map.read();
let zipper = if base_path.is_empty() {
map_guard.read_zipper()
} else {
map_guard.read_zipper_at_path(&**base_path)
};
let mask = zipper.child_mask();
let first_bytes: Vec<u8> = (0..=255u8).filter(|byte| mask.test_bit(*byte)).collect();
let mut edges = Vec::new();
for first_byte in first_bytes {
let seq_len = match Self::utf8_sequence_len(first_byte) {
Some(seq_len) => seq_len,
None => continue,
};
let mut partials = vec![vec![first_byte]];
for _ in 1..seq_len {
let mut next_partials = Vec::new();
for partial in partials {
let mut absolute_path = Vec::with_capacity(base_path.len() + partial.len());
absolute_path.extend_from_slice(&base_path);
absolute_path.extend_from_slice(&partial);
let partial_zipper = map_guard.read_zipper_at_path(&absolute_path);
if !partial_zipper.path_exists() {
continue;
}
let child_mask = partial_zipper.child_mask();
for cont_byte in (0..=255u8).filter(|byte| {
child_mask.test_bit(*byte) && (*byte & 0b1100_0000) == 0b1000_0000
}) {
let mut extended = partial.clone();
extended.push(cont_byte);
next_partials.push(extended);
}
}
partials = next_partials;
}
for utf8_bytes in partials {
if utf8_bytes.len() != seq_len {
continue;
}
if let Ok(s) = std::str::from_utf8(&utf8_bytes) {
let mut chars = s.chars();
if let (Some(c), None) = (chars.next(), chars.next()) {
edges.push((c, utf8_bytes));
}
}
}
}
edges
};
Box::new(char_edges.into_iter().map(move |(c, utf8_bytes)| {
let mut new_path = Vec::with_capacity(base_path.len() + utf8_bytes.len());
new_path.extend_from_slice(&base_path);
new_path.extend_from_slice(&utf8_bytes);
(
c,
PathMapNodeChar {
map: Arc::clone(&map),
byte_path: Arc::new(new_path),
},
)
}))
}
fn edge_count(&self) -> Option<usize> {
Some(self.edges().count())
}
}
impl<V: DictionaryValue> MappedDictionaryNode for PathMapNodeChar<V> {
type Value = V;
#[inline]
fn value(&self) -> Option<Self::Value> {
self.with_zipper(|zipper| zipper.val().cloned())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pathmap_char_creation() {
let dict: PathMapDictionaryChar<()> =
PathMapDictionaryChar::from_terms(vec!["café", "中文", "🎉"]);
assert_eq!(dict.len(), Some(3));
}
#[test]
fn test_pathmap_char_contains() {
let dict: PathMapDictionaryChar<()> =
PathMapDictionaryChar::from_terms(vec!["café", "naïve"]);
assert!(dict.contains("café"));
assert!(dict.contains("naïve"));
assert!(!dict.contains("cafe")); }
#[test]
fn test_pathmap_char_unicode_terms() {
let dict: PathMapDictionaryChar<()> =
PathMapDictionaryChar::from_terms(vec!["hello", "café", "中文", "🎉", "test123"]);
assert!(dict.contains("hello"));
assert!(dict.contains("café"));
assert!(dict.contains("中文"));
assert!(dict.contains("🎉"));
assert!(dict.contains("test123"));
assert!(!dict.contains("missing"));
}
#[test]
fn test_pathmap_char_node_traversal() {
let dict: PathMapDictionaryChar<()> = PathMapDictionaryChar::from_terms(vec!["café"]);
let root = dict.root();
let c = root.transition('c').expect("should have 'c'");
let a = c.transition('a').expect("should have 'a'");
let f = a.transition('f').expect("should have 'f'");
let e_acute = f.transition('é').expect("should have 'é'");
assert!(e_acute.is_final(), "'café' should be final");
assert!(!f.is_final(), "'caf' should not be final");
}
#[test]
fn test_pathmap_char_node_edges() {
let dict: PathMapDictionaryChar<()> =
PathMapDictionaryChar::from_terms(vec!["café", "car", "cart"]);
let root = dict.root();
let c = root.transition('c').expect("should have 'c'");
let a = c.transition('a').expect("should have 'a'");
let edges: Vec<char> = a.edges().map(|(ch, _)| ch).collect();
assert!(edges.contains(&'f'), "should have 'f' for 'café'");
assert!(edges.contains(&'r'), "should have 'r' for 'car'");
}
#[test]
fn test_pathmap_char_insert() {
let dict: PathMapDictionaryChar<()> = PathMapDictionaryChar::from_terms(vec!["test"]);
assert_eq!(dict.term_count(), 1);
assert!(dict.insert("café"));
assert_eq!(dict.term_count(), 2);
assert!(dict.contains("café"));
assert!(!dict.insert("test"));
assert_eq!(dict.term_count(), 2);
}
#[test]
fn test_pathmap_char_remove() {
let dict: PathMapDictionaryChar<()> =
PathMapDictionaryChar::from_terms(vec!["café", "中文", "test"]);
assert_eq!(dict.term_count(), 3);
assert!(dict.remove("café"));
assert_eq!(dict.term_count(), 2);
assert!(!dict.contains("café"));
assert!(dict.contains("中文"));
assert!(dict.contains("test"));
assert!(!dict.remove("missing"));
assert_eq!(dict.term_count(), 2);
}
#[test]
fn test_pathmap_char_with_values() {
let terms_with_values = vec![("café", 1u32), ("中文", 2u32), ("🎉", 3u32)];
let dict: PathMapDictionaryChar<u32> =
PathMapDictionaryChar::from_terms_with_values(terms_with_values);
assert_eq!(dict.len(), Some(3));
assert_eq!(dict.get_value("café"), Some(1));
assert_eq!(dict.get_value("中文"), Some(2));
assert_eq!(dict.get_value("🎉"), Some(3));
assert_eq!(dict.get_value("missing"), None);
}
#[test]
fn test_pathmap_char_node_value() {
let terms_with_values = vec![("café", 10u32), ("中文", 20u32)];
let dict: PathMapDictionaryChar<u32> =
PathMapDictionaryChar::from_terms_with_values(terms_with_values);
let root = dict.root();
let c = root.transition('c').expect("should have 'c'");
let a = c.transition('a').expect("should have 'a'");
let f = a.transition('f').expect("should have 'f'");
let e_acute = f.transition('é').expect("should have 'é'");
assert!(e_acute.is_final());
assert_eq!(e_acute.value(), Some(10));
assert!(!c.is_final());
assert_eq!(c.value(), None);
}
#[test]
fn test_pathmap_char_emoji() {
let dict: PathMapDictionaryChar<()> =
PathMapDictionaryChar::from_terms(vec!["hello🎉", "world🌍"]);
assert!(dict.contains("hello🎉"));
assert!(dict.contains("world🌍"));
let root = dict.root();
let h = root.transition('h').unwrap();
let e = h.transition('e').unwrap();
let l1 = e.transition('l').unwrap();
let l2 = l1.transition('l').unwrap();
let o = l2.transition('o').unwrap();
let emoji = o.transition('🎉').expect("should have emoji");
assert!(emoji.is_final());
}
#[test]
fn test_pathmap_char_cjk() {
let dict: PathMapDictionaryChar<()> =
PathMapDictionaryChar::from_terms(vec!["中文", "日本語"]);
assert!(dict.contains("中文"));
assert!(dict.contains("日本語"));
let root = dict.root();
let zhong = root.transition('中').expect("should have '中'");
let wen = zhong.transition('文').expect("should have '文'");
assert!(wen.is_final());
}
}