use crate::dynamic_dawg_u64_zipper::DynamicDawgU64Zipper;
use crate::value::DictionaryValue;
use crate::{Dictionary, DictionaryNode, SyncStrategy};
use arc_swap::{ArcSwap, ArcSwapOption};
use smallvec::SmallVec;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
#[derive(Clone, Debug, Default)]
pub(crate) struct EdgeList<V: DictionaryValue> {
pub(crate) edges: SmallVec<[(u64, Arc<DawgNodeU64<V>>); 4]>,
}
impl<V: DictionaryValue> EdgeList<V> {
fn new() -> Self {
EdgeList {
edges: SmallVec::new(),
}
}
#[inline]
pub(crate) fn find(&self, label: u64) -> Option<&Arc<DawgNodeU64<V>>> {
self.edges
.iter()
.find(|(l, _)| *l == label)
.map(|(_, node)| node)
}
fn with_edge(&self, label: u64, node: Arc<DawgNodeU64<V>>) -> Self {
let mut new_edges = self.edges.clone();
let pos = new_edges
.iter()
.position(|(l, _)| *l >= label)
.unwrap_or(new_edges.len());
if pos < new_edges.len() && new_edges[pos].0 == label {
new_edges[pos] = (label, node);
} else {
new_edges.insert(pos, (label, node));
}
EdgeList { edges: new_edges }
}
#[allow(dead_code)]
fn without_edge(&self, label: u64) -> Self {
let new_edges: SmallVec<_> = self
.edges
.iter()
.filter(|(l, _)| *l != label)
.cloned()
.collect();
EdgeList { edges: new_edges }
}
}
#[derive(Debug)]
pub(crate) struct DawgNodeU64<V: DictionaryValue> {
pub(crate) edges: ArcSwap<EdgeList<V>>,
pub(crate) is_final: AtomicBool,
pub(crate) value: ArcSwapOption<V>,
}
impl<V: DictionaryValue> Clone for DawgNodeU64<V> {
fn clone(&self) -> Self {
let edges_guard = self.edges.load();
let edges_clone = (**edges_guard).clone();
let value_guard = self.value.load();
let value_clone: Option<V> = value_guard.as_ref().map(|arc| (**arc).clone());
DawgNodeU64 {
edges: ArcSwap::from_pointee(edges_clone),
is_final: AtomicBool::new(self.is_final.load(Ordering::Acquire)),
value: match value_clone {
Some(v) => ArcSwapOption::from_pointee(Some(v)),
None => ArcSwapOption::empty(),
},
}
}
}
impl<V: DictionaryValue> DawgNodeU64<V> {
fn new(is_final: bool) -> Self {
DawgNodeU64 {
edges: ArcSwap::from_pointee(EdgeList::new()),
is_final: AtomicBool::new(is_final),
value: ArcSwapOption::empty(),
}
}
fn new_with_value(is_final: bool, value: Option<V>) -> Self {
DawgNodeU64 {
edges: ArcSwap::from_pointee(EdgeList::new()),
is_final: AtomicBool::new(is_final),
value: match value {
Some(v) => ArcSwapOption::from_pointee(Some(v)),
None => ArcSwapOption::empty(),
},
}
}
}
pub struct DynamicDawgU64<V: DictionaryValue = ()> {
root: Arc<DawgNodeU64<V>>,
term_count: AtomicUsize,
needs_compaction: AtomicBool,
}
impl<V: DictionaryValue> Clone for DynamicDawgU64<V> {
fn clone(&self) -> Self {
DynamicDawgU64 {
root: Arc::new(self.deep_clone_node(&self.root)),
term_count: AtomicUsize::new(self.term_count.load(Ordering::Relaxed)),
needs_compaction: AtomicBool::new(self.needs_compaction.load(Ordering::Relaxed)),
}
}
}
impl<V: DictionaryValue> std::fmt::Debug for DynamicDawgU64<V> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DynamicDawgU64")
.field("term_count", &self.term_count.load(Ordering::Relaxed))
.field(
"needs_compaction",
&self.needs_compaction.load(Ordering::Relaxed),
)
.finish()
}
}
impl<V: DictionaryValue> Default for DynamicDawgU64<V> {
fn default() -> Self {
Self::new()
}
}
impl<V: DictionaryValue> DynamicDawgU64<V> {
fn deep_clone_node(&self, node: &Arc<DawgNodeU64<V>>) -> DawgNodeU64<V> {
let edges = node.edges.load();
let new_edges: SmallVec<_> = edges
.edges
.iter()
.map(|(label, child)| (*label, Arc::new(self.deep_clone_node(child))))
.collect();
let value_guard = node.value.load();
let value_clone: Option<V> = value_guard.as_ref().map(|arc| (**arc).clone());
DawgNodeU64 {
edges: ArcSwap::from_pointee(EdgeList { edges: new_edges }),
is_final: AtomicBool::new(node.is_final.load(Ordering::Acquire)),
value: match value_clone {
Some(v) => ArcSwapOption::from_pointee(Some(v)),
None => ArcSwapOption::empty(),
},
}
}
pub fn new() -> Self {
DynamicDawgU64 {
root: Arc::new(DawgNodeU64::new(false)),
term_count: AtomicUsize::new(0),
needs_compaction: AtomicBool::new(false),
}
}
#[inline]
pub(crate) fn root_arc(&self) -> Arc<DawgNodeU64<V>> {
self.root.clone()
}
pub fn with_auto_minimize_threshold(_threshold: f32) -> Self {
Self::new()
}
pub fn with_config(
_auto_minimize_threshold: f32,
_bloom_filter_capacity: Option<usize>,
) -> Self {
Self::new()
}
pub fn from_terms<I, S>(terms: I) -> Self
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
let dawg = Self::new();
for term in terms {
dawg.insert(term.as_ref());
}
dawg
}
pub fn from_sorted_terms<I, S>(terms: I) -> Self
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
Self::from_terms(terms)
}
pub fn from_terms_with_values<I, S>(entries: I) -> Self
where
I: IntoIterator<Item = (S, V)>,
S: AsRef<str>,
{
let dawg = Self::new();
for (term, value) in entries {
dawg.insert_with_value(term.as_ref(), value);
}
dawg
}
pub fn insert(&self, term: &str) -> bool {
let sequence: Vec<u64> = crate::CharUnit::from_str(term);
self.insert_sequence(&sequence)
}
pub fn insert_with_value(&self, term: &str, value: V) -> bool {
let sequence: Vec<u64> = crate::CharUnit::from_str(term);
self.insert_sequence_with_value(&sequence, value)
}
pub fn update_or_insert<F>(&self, term: &str, default_value: V, update_fn: F) -> bool
where
F: FnOnce(&mut V),
{
let sequence: Vec<u64> = crate::CharUnit::from_str(term);
self.update_or_insert_sequence(&sequence, default_value, update_fn)
}
pub fn get_value(&self, term: &str) -> Option<V> {
let sequence: Vec<u64> = crate::CharUnit::from_str(term);
self.get_sequence_value(&sequence)
}
pub fn remove(&self, term: &str) -> bool {
let sequence: Vec<u64> = crate::CharUnit::from_str(term);
self.remove_sequence(&sequence)
}
pub fn compact(&self) -> usize {
self.needs_compaction.store(false, Ordering::Relaxed);
0
}
pub fn minimize(&self) -> usize {
0
}
pub fn extend<I, S>(&self, terms: I) -> usize
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
let mut added = 0;
for term in terms {
if self.insert(term.as_ref()) {
added += 1;
}
}
added
}
pub fn remove_many<I, S>(&self, terms: I) -> usize
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
let mut removed = 0;
for term in terms {
if self.remove(term.as_ref()) {
removed += 1;
}
}
removed
}
#[inline]
pub fn term_count(&self) -> usize {
self.term_count.load(Ordering::Relaxed)
}
pub fn node_count(&self) -> usize {
self.count_nodes_recursive(&self.root)
}
fn count_nodes_recursive(&self, node: &Arc<DawgNodeU64<V>>) -> usize {
let edges = node.edges.load();
let mut count = 1;
for (_, child) in edges.edges.iter() {
count += self.count_nodes_recursive(child);
}
count
}
#[inline]
pub fn needs_compaction(&self) -> bool {
self.needs_compaction.load(Ordering::Relaxed)
}
pub fn contains(&self, term: &str) -> bool {
let sequence: Vec<u64> = crate::CharUnit::from_str(term);
self.contains_sequence(&sequence)
}
pub fn insert_sequence(&self, sequence: &[u64]) -> bool {
if sequence.is_empty() {
if self
.root
.is_final
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
self.term_count.fetch_add(1, Ordering::Relaxed);
return true;
}
return false;
}
let mut current: Arc<DawgNodeU64<V>> = self.root.clone();
for (i, &label) in sequence.iter().enumerate() {
let is_last = i == sequence.len() - 1;
loop {
let edges = current.edges.load();
if let Some(child) = edges.find(label) {
if is_last {
if child
.is_final
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
self.term_count.fetch_add(1, Ordering::Relaxed);
return true;
}
return false; }
current = child.clone();
break;
} else {
let new_node = Arc::new(DawgNodeU64::new(is_last));
let new_edges = Arc::new(edges.with_edge(label, new_node.clone()));
let prev = current.edges.compare_and_swap(&edges, new_edges.clone());
if Arc::ptr_eq(&prev, &edges) {
if is_last {
self.term_count.fetch_add(1, Ordering::Relaxed);
return true;
}
current = new_node;
break;
}
}
}
}
true
}
pub fn insert_sequence_with_value(&self, sequence: &[u64], value: V) -> bool {
if sequence.is_empty() {
if self
.root
.is_final
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
self.root.value.store(Some(Arc::new(value)));
self.term_count.fetch_add(1, Ordering::Relaxed);
return true;
}
self.root.value.store(Some(Arc::new(value)));
return false;
}
let mut current: Arc<DawgNodeU64<V>> = self.root.clone();
for (i, &label) in sequence.iter().enumerate() {
let is_last = i == sequence.len() - 1;
loop {
let edges = current.edges.load();
if let Some(child) = edges.find(label) {
if is_last {
if child
.is_final
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
child.value.store(Some(Arc::new(value)));
self.term_count.fetch_add(1, Ordering::Relaxed);
return true;
}
child.value.store(Some(Arc::new(value)));
return false;
}
current = child.clone();
break;
} else {
let new_node = Arc::new(if is_last {
DawgNodeU64::new_with_value(true, Some(value.clone()))
} else {
DawgNodeU64::new(false)
});
let new_edges = Arc::new(edges.with_edge(label, new_node.clone()));
let prev = current.edges.compare_and_swap(&edges, new_edges.clone());
if Arc::ptr_eq(&prev, &edges) {
if is_last {
self.term_count.fetch_add(1, Ordering::Relaxed);
return true;
}
current = new_node;
break;
}
}
}
}
true
}
pub fn update_or_insert_sequence<F>(
&self,
sequence: &[u64],
default_value: V,
update_fn: F,
) -> bool
where
F: FnOnce(&mut V),
{
if sequence.is_empty() {
if self.root.is_final.load(Ordering::Acquire) {
let current_val = self.root.value.load();
let new_value = if let Some(val) = &*current_val {
let mut new_value = (**val).clone();
update_fn(&mut new_value);
new_value
} else {
default_value
};
self.root.value.store(Some(Arc::new(new_value)));
return false;
}
self.root.value.store(Some(Arc::new(default_value)));
self.root.is_final.store(true, Ordering::Release);
self.term_count.fetch_add(1, Ordering::Relaxed);
return true;
}
let mut current: Arc<DawgNodeU64<V>> = self.root.clone();
for &label in sequence {
loop {
let edges = current.edges.load();
if let Some(child) = edges.find(label) {
current = child.clone();
break;
}
let new_node = Arc::new(DawgNodeU64::new(false));
let new_edges = Arc::new(edges.with_edge(label, new_node.clone()));
let prev = current.edges.compare_and_swap(&edges, new_edges);
if Arc::ptr_eq(&prev, &edges) {
current = new_node;
break;
}
}
}
if current
.is_final
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
current.value.store(Some(Arc::new(default_value)));
self.term_count.fetch_add(1, Ordering::Relaxed);
return true;
}
let current_val = current.value.load();
let new_value = if let Some(val) = &*current_val {
let mut new_value = (**val).clone();
update_fn(&mut new_value);
new_value
} else {
default_value
};
current.value.store(Some(Arc::new(new_value)));
false
}
pub fn get_sequence_value(&self, sequence: &[u64]) -> Option<V> {
let mut current: Arc<DawgNodeU64<V>> = self.root.clone();
for &label in sequence {
let edges = current.edges.load();
match edges.find(label) {
Some(child) => {
current = child.clone();
}
None => return None,
}
}
if current.is_final.load(Ordering::Acquire) {
let val = current.value.load();
if let Some(v) = &*val {
return Some((**v).clone());
}
}
None
}
#[inline]
pub fn contains_sequence(&self, sequence: &[u64]) -> bool {
let mut current: Arc<DawgNodeU64<V>> = self.root.clone();
for &label in sequence {
let edges = current.edges.load();
match edges.find(label) {
Some(child) => {
current = child.clone();
}
None => return false,
}
}
current.is_final.load(Ordering::Acquire)
}
pub fn remove_sequence(&self, sequence: &[u64]) -> bool {
if sequence.is_empty() {
if self
.root
.is_final
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
self.root.value.store(None);
self.term_count.fetch_sub(1, Ordering::Relaxed);
self.needs_compaction.store(true, Ordering::Relaxed);
return true;
}
return false;
}
let mut current: Arc<DawgNodeU64<V>> = self.root.clone();
for &label in sequence {
let edges = current.edges.load();
match edges.find(label) {
Some(child) => {
current = child.clone();
}
None => return false,
}
}
if current
.is_final
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
current.value.store(None);
self.term_count.fetch_sub(1, Ordering::Relaxed);
self.needs_compaction.store(true, Ordering::Relaxed);
return true;
}
false
}
pub fn insert_f64(&self, series: &[f64]) -> bool {
let sequence: Vec<u64> = series.iter().map(|f| f.to_bits()).collect();
self.insert_sequence(&sequence)
}
pub fn insert_f64_with_value(&self, series: &[f64], value: V) -> bool {
let sequence: Vec<u64> = series.iter().map(|f| f.to_bits()).collect();
self.insert_sequence_with_value(&sequence, value)
}
pub fn contains_f64(&self, series: &[f64]) -> bool {
let sequence: Vec<u64> = series.iter().map(|f| f.to_bits()).collect();
self.contains_sequence(&sequence)
}
pub fn get_f64_value(&self, series: &[f64]) -> Option<V> {
let sequence: Vec<u64> = series.iter().map(|f| f.to_bits()).collect();
self.get_sequence_value(&sequence)
}
pub fn remove_f64(&self, series: &[f64]) -> bool {
let sequence: Vec<u64> = series.iter().map(|f| f.to_bits()).collect();
self.remove_sequence(&sequence)
}
pub fn zipper(&self) -> DynamicDawgU64Zipper<V> {
DynamicDawgU64Zipper::new_from_dict(self)
}
#[allow(dead_code)]
pub(crate) fn root_node(&self) -> &Arc<DawgNodeU64<V>> {
&self.root
}
pub fn iter(&self) -> impl Iterator<Item = Vec<u64>> + '_ {
DawgIterator::new(self)
}
pub fn iter_with_values(&self) -> impl Iterator<Item = (Vec<u64>, V)> + '_ {
DawgIteratorWithValues::new(self)
}
}
struct DawgIterator<'a, V: DictionaryValue> {
#[allow(dead_code)]
dawg: &'a DynamicDawgU64<V>,
stack: Vec<(Arc<DawgNodeU64<V>>, Vec<u64>, usize)>,
}
impl<'a, V: DictionaryValue> DawgIterator<'a, V> {
fn new(dawg: &'a DynamicDawgU64<V>) -> Self {
DawgIterator {
dawg,
stack: vec![(dawg.root.clone(), Vec::new(), 0)],
}
}
}
impl<V: DictionaryValue> Iterator for DawgIterator<'_, V> {
type Item = Vec<u64>;
fn next(&mut self) -> Option<Self::Item> {
while let Some((node, path, edge_idx)) = self.stack.pop() {
let edges = node.edges.load();
if edge_idx < edges.edges.len() {
let (label, child) = &edges.edges[edge_idx];
let mut new_path = path.clone();
new_path.push(*label);
self.stack.push((node.clone(), path, edge_idx + 1));
self.stack.push((child.clone(), new_path, 0));
} else if node.is_final.load(Ordering::Acquire) {
return Some(path);
}
}
None
}
}
struct DawgIteratorWithValues<'a, V: DictionaryValue> {
#[allow(dead_code)]
dawg: &'a DynamicDawgU64<V>,
stack: Vec<(Arc<DawgNodeU64<V>>, Vec<u64>, usize)>,
}
impl<'a, V: DictionaryValue> DawgIteratorWithValues<'a, V> {
fn new(dawg: &'a DynamicDawgU64<V>) -> Self {
DawgIteratorWithValues {
dawg,
stack: vec![(dawg.root.clone(), Vec::new(), 0)],
}
}
}
impl<V: DictionaryValue> Iterator for DawgIteratorWithValues<'_, V> {
type Item = (Vec<u64>, V);
fn next(&mut self) -> Option<Self::Item> {
while let Some((node, path, edge_idx)) = self.stack.pop() {
let edges = node.edges.load();
if edge_idx < edges.edges.len() {
let (label, child) = &edges.edges[edge_idx];
let mut new_path = path.clone();
new_path.push(*label);
self.stack.push((node.clone(), path, edge_idx + 1));
self.stack.push((child.clone(), new_path, 0));
} else if node.is_final.load(Ordering::Acquire) {
let val = node.value.load();
if let Some(v) = &*val {
return Some((path, (**v).clone()));
}
}
}
None
}
}
pub struct DynamicDawgU64Node<V: DictionaryValue> {
node: Arc<DawgNodeU64<V>>,
}
impl<V: DictionaryValue> Clone for DynamicDawgU64Node<V> {
fn clone(&self) -> Self {
DynamicDawgU64Node {
node: self.node.clone(),
}
}
}
impl<V: DictionaryValue> DictionaryNode for DynamicDawgU64Node<V> {
type Unit = u64;
fn is_final(&self) -> bool {
self.node.is_final.load(Ordering::Acquire)
}
fn transition(&self, label: Self::Unit) -> Option<Self> {
let edges = self.node.edges.load();
edges.find(label).map(|child| DynamicDawgU64Node {
node: child.clone(),
})
}
fn edges(&self) -> Box<dyn Iterator<Item = (Self::Unit, Self)> + '_> {
let edges_guard = self.node.edges.load();
let edges_vec: Vec<_> = edges_guard
.edges
.iter()
.map(|(label, child)| {
(
*label,
DynamicDawgU64Node {
node: child.clone(),
},
)
})
.collect();
Box::new(edges_vec.into_iter())
}
fn edge_count(&self) -> Option<usize> {
Some(self.node.edges.load().edges.len())
}
}
impl<V: DictionaryValue> crate::MutableDictionary for DynamicDawgU64<V> {
fn insert(&self, term: &str) -> bool {
Self::insert(self, term)
}
fn remove(&self, term: &str) -> bool {
Self::remove(self, term)
}
}
impl<V: DictionaryValue> crate::CompactableDictionary for DynamicDawgU64<V> {
fn needs_compaction(&self) -> bool {
Self::needs_compaction(self)
}
fn compact(&self) -> usize {
Self::compact(self)
}
fn minimize(&self) -> usize {
Self::minimize(self)
}
}
impl<V: DictionaryValue> Dictionary for DynamicDawgU64<V> {
type Node = DynamicDawgU64Node<V>;
fn root(&self) -> Self::Node {
DynamicDawgU64Node {
node: self.root.clone(),
}
}
fn len(&self) -> Option<usize> {
Some(self.term_count())
}
fn is_empty(&self) -> bool {
self.term_count() == 0
}
fn sync_strategy(&self) -> SyncStrategy {
SyncStrategy::InternalSync
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_dawg_is_empty() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
assert_eq!(dawg.term_count(), 0);
assert!(!dawg.needs_compaction());
}
#[test]
fn test_insert_sequence() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
assert!(dawg.insert_sequence(&[1, 2, 3]));
assert!(!dawg.insert_sequence(&[1, 2, 3])); assert!(dawg.insert_sequence(&[1, 2, 4]));
assert!(dawg.insert_sequence(&[5, 6, 7]));
assert_eq!(dawg.term_count(), 3);
}
#[test]
fn test_contains_sequence() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
dawg.insert_sequence(&[1, 2, 3]);
dawg.insert_sequence(&[1, 2, 4]);
assert!(dawg.contains_sequence(&[1, 2, 3]));
assert!(dawg.contains_sequence(&[1, 2, 4]));
assert!(!dawg.contains_sequence(&[1, 2])); assert!(!dawg.contains_sequence(&[1, 2, 5])); assert!(!dawg.contains_sequence(&[9, 9, 9]));
}
#[test]
fn test_empty_sequence() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
assert!(dawg.insert_sequence(&[]));
assert!(!dawg.insert_sequence(&[])); assert!(dawg.contains_sequence(&[]));
assert_eq!(dawg.term_count(), 1);
}
#[test]
fn test_remove_sequence() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
dawg.insert_sequence(&[1, 2, 3]);
dawg.insert_sequence(&[1, 2, 4]);
assert!(dawg.remove_sequence(&[1, 2, 3]));
assert!(!dawg.contains_sequence(&[1, 2, 3]));
assert!(dawg.contains_sequence(&[1, 2, 4])); assert_eq!(dawg.term_count(), 1);
assert!(!dawg.remove_sequence(&[1, 2, 3])); }
#[test]
fn test_f64_api() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
assert!(dawg.insert_f64(&[1.0, 2.0, 3.0]));
assert!(dawg.insert_f64(&[1.0, 2.0, 4.0]));
assert!(!dawg.insert_f64(&[1.0, 2.0, 3.0]));
assert!(dawg.contains_f64(&[1.0, 2.0, 3.0]));
assert!(dawg.contains_f64(&[1.0, 2.0, 4.0]));
assert!(!dawg.contains_f64(&[1.0, 2.0, 5.0]));
}
#[test]
fn test_f64_edge_cases() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
dawg.insert_f64(&[0.0, f64::INFINITY, f64::NEG_INFINITY]);
dawg.insert_f64(&[-0.0]);
assert!(dawg.contains_f64(&[0.0, f64::INFINITY, f64::NEG_INFINITY]));
assert!(dawg.contains_f64(&[-0.0]));
let nan_bits = f64::NAN.to_bits();
dawg.insert_sequence(&[nan_bits]);
assert!(dawg.contains_sequence(&[nan_bits]));
}
#[test]
fn test_valued_dawg() {
let dawg: DynamicDawgU64<u32> = DynamicDawgU64::new();
assert!(dawg.insert_sequence_with_value(&[1, 2, 3], 42));
assert!(!dawg.insert_sequence_with_value(&[1, 2, 3], 99));
assert_eq!(dawg.get_sequence_value(&[1, 2, 3]), Some(99));
assert_eq!(dawg.get_sequence_value(&[1, 2, 4]), None);
}
#[test]
fn test_string_api() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
assert!(dawg.insert("hello"));
assert!(!dawg.insert("hello")); assert!(dawg.insert("world"));
assert!(dawg.contains("hello"));
assert!(dawg.contains("world"));
assert!(!dawg.contains("foo"));
}
#[test]
fn test_clone() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
dawg.insert_sequence(&[1, 2, 3]);
dawg.insert_sequence(&[4, 5, 6]);
let cloned = dawg.clone();
assert_eq!(cloned.term_count(), dawg.term_count());
assert!(cloned.contains_sequence(&[1, 2, 3]));
assert!(cloned.contains_sequence(&[4, 5, 6]));
cloned.insert_sequence(&[7, 8, 9]);
assert!(cloned.contains_sequence(&[7, 8, 9]));
assert!(!dawg.contains_sequence(&[7, 8, 9]));
}
#[test]
fn test_concurrent_inserts() {
use std::sync::Arc as StdArc;
use std::thread;
let dawg = StdArc::new(DynamicDawgU64::<()>::new());
let num_threads = 8;
let sequences_per_thread = 100;
let handles: Vec<_> = (0..num_threads)
.map(|t| {
let d = dawg.clone();
thread::spawn(move || {
for i in 0..sequences_per_thread {
let seq = vec![t as u64, i as u64];
d.insert_sequence(&seq);
}
})
})
.collect();
for h in handles {
h.join().unwrap();
}
assert_eq!(dawg.term_count(), num_threads * sequences_per_thread);
for t in 0..num_threads {
for i in 0..sequences_per_thread {
assert!(dawg.contains_sequence(&[t as u64, i as u64]));
}
}
}
#[test]
fn test_concurrent_reads_and_writes() {
use std::sync::Arc as StdArc;
use std::thread;
let dawg = StdArc::new(DynamicDawgU64::<()>::new());
for i in 0..100 {
dawg.insert_sequence(&[i, i + 1, i + 2]);
}
let handles: Vec<_> = (0..10)
.map(|t| {
let d = dawg.clone();
thread::spawn(move || {
if t % 2 == 0 {
for i in 100 + t * 10..100 + (t + 1) * 10 {
d.insert_sequence(&[i as u64, i as u64 + 1]);
}
} else {
for i in 0..100 {
let _ = d.contains_sequence(&[i, i + 1, i + 2]);
}
}
})
})
.collect();
for h in handles {
h.join().unwrap();
}
for i in 0..100u64 {
assert!(dawg.contains_sequence(&[i, i + 1, i + 2]));
}
}
#[test]
fn test_dictionary_trait() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
dawg.insert_sequence(&[1, 2, 3]);
let root = dawg.root();
assert!(!root.is_final());
let n1 = root.transition(1).expect("Should have transition");
assert!(!n1.is_final());
let n2 = n1.transition(2).expect("Should have transition");
assert!(!n2.is_final());
let n3 = n2.transition(3).expect("Should have transition");
assert!(n3.is_final());
assert!(n2.transition(9).is_none());
}
#[test]
fn test_from_terms() {
let terms = vec!["apple", "banana", "cherry"];
let dawg: DynamicDawgU64<()> = DynamicDawgU64::from_terms(terms);
assert!(dawg.contains("apple"));
assert!(dawg.contains("banana"));
assert!(dawg.contains("cherry"));
assert_eq!(dawg.term_count(), 3);
}
#[test]
fn test_extend() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
dawg.insert("existing");
let added = dawg.extend(vec!["new1", "new2", "existing"]);
assert_eq!(added, 2); assert_eq!(dawg.term_count(), 3);
}
#[test]
fn test_node_count() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
assert_eq!(dawg.node_count(), 1);
dawg.insert_sequence(&[1, 2, 3]);
assert_eq!(dawg.node_count(), 4);
dawg.insert_sequence(&[1, 2, 4]);
assert_eq!(dawg.node_count(), 5); }
#[test]
fn test_prefix_sharing() {
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
dawg.insert_sequence(&[1, 2, 3]);
dawg.insert_sequence(&[1, 2, 4]);
dawg.insert_sequence(&[1, 2, 5]);
assert_eq!(dawg.node_count(), 6);
}
#[test]
fn test_stress_100_concurrent_readers() {
use std::sync::Arc as StdArc;
use std::thread;
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
for i in 0u64..1000 {
dawg.insert_sequence(&[i, i + 1, i + 2]);
}
let dawg = StdArc::new(dawg);
let handles: Vec<_> = (0..100)
.map(|reader_id| {
let dawg = StdArc::clone(&dawg);
thread::spawn(move || {
for i in 0u64..1000 {
let seq = [i, i + 1, i + 2];
let found = dawg.contains_sequence(&seq);
assert!(found, "Reader {reader_id} failed to find sequence {i}");
}
})
})
.collect();
for handle in handles {
handle.join().expect("Reader thread panicked");
}
}
#[test]
fn test_stress_readers_and_writers() {
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc as StdArc;
use std::thread;
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
for i in 0u64..100 {
dawg.insert_sequence(&[i, i + 1]);
}
let dawg = StdArc::new(dawg);
let stop = StdArc::new(AtomicBool::new(false));
let reader_handles: Vec<_> = (0..10)
.map(|_| {
let dawg = StdArc::clone(&dawg);
let stop = StdArc::clone(&stop);
thread::spawn(move || {
let mut reads = 0u64;
while !stop.load(Ordering::Relaxed) {
for i in 0u64..100 {
let _ = dawg.contains_sequence(&[i, i + 1]);
reads += 1;
}
}
reads
})
})
.collect();
let writer_handles: Vec<_> = (0..10)
.map(|writer_id| {
let dawg = StdArc::clone(&dawg);
thread::spawn(move || {
let base = 1000 + (writer_id as u64 * 100);
for i in 0u64..100 {
dawg.insert_sequence(&[base + i, base + i + 1, base + i + 2]);
}
})
})
.collect();
for handle in writer_handles {
handle.join().expect("Writer thread panicked");
}
stop.store(true, Ordering::Relaxed);
let total_reads: u64 = reader_handles
.into_iter()
.map(|h| h.join().expect("Reader thread panicked"))
.sum();
for writer_id in 0..10 {
let base = 1000 + (writer_id as u64 * 100);
for i in 0u64..100 {
assert!(
dawg.contains_sequence(&[base + i, base + i + 1, base + i + 2]),
"Missing sequence from writer {writer_id} at offset {i}"
);
}
}
assert!(total_reads > 1000, "Expected many reads, got {total_reads}");
}
#[test]
fn test_stress_50_writers_same_keys() {
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc as StdArc;
use std::thread;
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
let dawg = StdArc::new(dawg);
let successful_inserts = StdArc::new(AtomicUsize::new(0));
let handles: Vec<_> = (0..50)
.map(|_| {
let dawg = StdArc::clone(&dawg);
let successful_inserts = StdArc::clone(&successful_inserts);
thread::spawn(move || {
for i in 0u64..100 {
if dawg.insert_sequence(&[i, i + 1, i + 2]) {
successful_inserts.fetch_add(1, Ordering::Relaxed);
}
}
})
})
.collect();
for handle in handles {
handle.join().expect("Writer thread panicked");
}
assert_eq!(dawg.term_count(), 100);
assert_eq!(successful_inserts.load(Ordering::Relaxed), 100);
for i in 0u64..100 {
assert!(dawg.contains_sequence(&[i, i + 1, i + 2]));
}
}
#[test]
fn test_stress_50_writers_disjoint_keys() {
use std::sync::Arc as StdArc;
use std::thread;
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
let dawg = StdArc::new(dawg);
let handles: Vec<_> = (0..50)
.map(|writer_id| {
let dawg = StdArc::clone(&dawg);
thread::spawn(move || {
let base = writer_id as u64 * 1000;
for i in 0u64..100 {
let inserted = dawg.insert_sequence(&[base + i, base + i + 1]);
assert!(
inserted,
"Writer {writer_id} failed to insert unique seq {i}"
);
}
})
})
.collect();
for handle in handles {
handle.join().expect("Writer thread panicked");
}
assert_eq!(dawg.term_count(), 5000);
for writer_id in 0u64..50 {
let base = writer_id * 1000;
for i in 0u64..100 {
assert!(
dawg.contains_sequence(&[base + i, base + i + 1]),
"Missing sequence from writer {writer_id} at offset {i}"
);
}
}
}
#[test]
fn test_stress_valued_concurrent_writes() {
use std::sync::Arc as StdArc;
use std::thread;
let dawg: DynamicDawgU64<u64> = DynamicDawgU64::new();
let dawg = StdArc::new(dawg);
let handles: Vec<_> = (0..20)
.map(|writer_id| {
let dawg = StdArc::clone(&dawg);
thread::spawn(move || {
let base = writer_id as u64 * 100;
for i in 0u64..50 {
let value = base + i;
dawg.insert_sequence_with_value(&[base + i], value);
}
})
})
.collect();
for handle in handles {
handle.join().expect("Writer thread panicked");
}
assert_eq!(dawg.term_count(), 1000);
for writer_id in 0u64..20 {
let base = writer_id * 100;
for i in 0u64..50 {
let expected_value = base + i;
let value = dawg.get_sequence_value(&[base + i]);
assert_eq!(
value,
Some(expected_value),
"Wrong value for sequence [{base} + {i}]"
);
}
}
}
#[test]
fn test_stress_remove_while_reading() {
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc as StdArc;
use std::thread;
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
for i in 0u64..1000 {
dawg.insert_sequence(&[i, i + 1]);
}
let dawg = StdArc::new(dawg);
let stop = StdArc::new(AtomicBool::new(false));
let reader_handles: Vec<_> = (0..5)
.map(|_| {
let dawg = StdArc::clone(&dawg);
let stop = StdArc::clone(&stop);
thread::spawn(move || {
while !stop.load(Ordering::Relaxed) {
for i in 0u64..1000 {
let _ = dawg.contains_sequence(&[i, i + 1]);
}
}
})
})
.collect();
let remover_handles: Vec<_> = (0..5)
.map(|remover_id| {
let dawg = StdArc::clone(&dawg);
thread::spawn(move || {
let base = remover_id as u64 * 200;
for i in 0u64..200 {
dawg.remove_sequence(&[base + i, base + i + 1]);
}
})
})
.collect();
for handle in remover_handles {
handle.join().expect("Remover thread panicked");
}
stop.store(true, Ordering::Relaxed);
for handle in reader_handles {
handle.join().expect("Reader thread panicked");
}
assert_eq!(dawg.term_count(), 0);
}
#[test]
fn test_stress_iterator_during_writes() {
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc as StdArc;
use std::thread;
let dawg: DynamicDawgU64<()> = DynamicDawgU64::new();
for i in 0u64..100 {
dawg.insert_sequence(&[i]);
}
let dawg = StdArc::new(dawg);
let stop = StdArc::new(AtomicBool::new(false));
let iter_dawg = StdArc::clone(&dawg);
let iter_stop = StdArc::clone(&stop);
let iter_handle = thread::spawn(move || {
let mut iteration_count = 0;
while !iter_stop.load(Ordering::Relaxed) {
let terms: Vec<_> = iter_dawg.iter().collect();
assert!(terms.len() >= 100);
iteration_count += 1;
}
iteration_count
});
let writer_handles: Vec<_> = (0..10)
.map(|writer_id| {
let dawg = StdArc::clone(&dawg);
thread::spawn(move || {
let base = 1000 + writer_id as u64 * 100;
for i in 0u64..100 {
dawg.insert_sequence(&[base + i]);
}
})
})
.collect();
for handle in writer_handles {
handle.join().expect("Writer thread panicked");
}
stop.store(true, Ordering::Relaxed);
let iterations = iter_handle.join().expect("Iterator thread panicked");
assert!(
iterations > 0,
"Iterator thread should have run at least once"
);
assert_eq!(dawg.term_count(), 1100);
}
}