sequence_trie/
lib.rs

1//! Sequence Trie - a trie-like data-structure for storing sequences of values.
2//!
3//! See the `SequenceTrie` type for documentation.
4
5#[cfg(not(feature = "btreemap"))]
6use std::hash::Hash;
7#[cfg(not(feature = "btreemap"))]
8use std::collections::hash_map::{self, HashMap};
9
10#[cfg(feature = "btreemap")]
11use std::collections::{btree_map, BTreeMap};
12
13use std::hash::BuildHasher;
14use std::collections::hash_map::RandomState;
15
16use std::iter::IntoIterator;
17use std::default::Default;
18use std::borrow::{Borrow, ToOwned};
19use std::mem;
20use std::marker::PhantomData;
21
22#[cfg(feature = "serde")]
23use serde::{Serialize, Deserialize};
24
25#[cfg(feature = "serde")]
26#[macro_use]
27extern crate serde;
28
29#[cfg(test)]
30mod tests;
31#[cfg(all(test, feature = "serde"))]
32mod serde_tests;
33
34/// A `SequenceTrie` is recursively defined as a value and a map containing child Tries.
35///
36/// Typically, Tries are used to store strings, which can be thought of as lists of `char`s.
37/// Generalising this to any key type, a Trie is a data structure storing values for keys
38/// which are themselves lists. Let the parts of such a list-key be called "key fragments".
39/// In our representation of a Trie, `K` denotes the type of the key fragments.
40///
41/// The nesting of child Tries creates a tree structure which can be traversed by mapping
42/// key fragments onto nodes. The structure is similar to a k-ary tree, except that the children
43/// are stored in `HashMap`s, and there is no bound on the number of children a single node may
44/// have (effectively k = ∞). In a `SequenceTrie` with `char` key fragments, the key
45/// `['a', 'b', 'c']` might correspond to something like this:
46///
47/// ```text
48/// SequenceTrie {
49///     value: Some(0),
50///     children: 'a' => SequenceTrie {
51///         value: Some(1),
52///         children: 'b' => SequenceTrie {
53///             value: None,
54///             children: 'c' => SequenceTrie {
55///                 value: Some(3),
56///                 children: Nil
57///             }
58///         }
59///     }
60/// }
61/// ```
62///
63/// Values are stored optionally at each node because inserting a value for a list-key only inserts
64/// a value for the last fragment of the key. The intermediate prefix nodes are created with value
65/// `None` if they do not exist already.
66///
67/// The above `SequenceTrie` could be created using the following sequence of operations:
68///
69/// ```
70/// # use sequence_trie::SequenceTrie;
71/// let mut trie: SequenceTrie<char, i32> = SequenceTrie::new();
72/// trie.insert(&['a', 'b', 'c'], 3);
73/// trie.insert(&[], 0);
74/// trie.insert(&['a'], 1);
75/// ```
76///
77/// The order of insertion is never important.
78///
79/// One interesting thing about Tries is that every key is a *descendant* of the root, which itself
80/// has no key fragment. Although this is a rather trivial observation, it means that every key
81/// corresponds to a non-empty sequence of prefix nodes in the tree. This observation is the
82/// motivation for the `get_prefix_nodes` method, which returns the nodes corresponding to the longest
83/// prefix of a given key.
84///
85/// The empty list key, `[]`, always corresponds to the root node of the Trie.
86///
87/// # The Sequence Trie Invariant
88/// All leaf nodes have non-trivial values (not equal to `None`). This invariant is maintained by
89/// the insertion and removal methods and can be relied upon.
90#[derive(Debug, Clone)]
91#[cfg(not(feature = "btreemap"))]
92#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
93#[cfg_attr(feature = "serde", serde(bound(serialize = "K: Serialize, V: Serialize")))]
94#[cfg_attr(feature = "serde",
95    serde(bound(deserialize = "K: Deserialize<'de>, V: Deserialize<'de>")))]
96pub struct SequenceTrie<K, V, S = RandomState>
97    where K: TrieKey,
98          S: BuildHasher + Default,
99{
100    /// Node value.
101    value: Option<V>,
102
103    /// Node children as a hashmap keyed by key fragments.
104    children: HashMap<K, SequenceTrie<K, V, S>, S>,
105}
106
107#[derive(Debug, Clone)]
108#[cfg(feature = "btreemap")]
109#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
110#[cfg_attr(feature = "serde", serde(bound(serialize = "K: Serialize, V: Serialize")))]
111#[cfg_attr(feature = "serde",
112    serde(bound(deserialize = "K: Deserialize<'de>, V: Deserialize<'de>")))]
113pub struct SequenceTrie<K, V, S = RandomState>
114    where K: TrieKey,
115          S: BuildHasher + Default,
116{
117    /// Node value.
118    value: Option<V>,
119
120    /// Node children as a btreemap keyed by key fragments.
121    children: BTreeMap<K, SequenceTrie<K, V, S>>,
122
123    /// Fake hasher for compatibility.
124    #[cfg_attr(feature = "serde", serde(skip))]
125    _phantom: PhantomData<S>,
126}
127
128/// Aggregate trait for types which can be used to key a `SequenceTrie`.
129///
130/// This trait is automatically implemented for all types implementing
131/// the supertraits.
132#[cfg(not(feature = "btreemap"))]
133pub trait TrieKey: Eq + Hash {}
134#[cfg(not(feature = "btreemap"))]
135impl<K> TrieKey for K where K: Eq + Hash + ?Sized {}
136
137#[cfg(feature = "btreemap")]
138pub trait TrieKey: Ord {}
139#[cfg(feature = "btreemap")]
140impl<K> TrieKey for K where K: Ord + ?Sized {}
141
142#[cfg(not(feature = "btreemap"))]
143impl<K, V> SequenceTrie<K, V>
144    where K: TrieKey,
145{
146    /// Creates a new `SequenceTrie` node with no value and an empty child map.
147    pub fn new() -> SequenceTrie<K, V> {
148        SequenceTrie::with_hasher(RandomState::new())
149    }
150}
151
152#[cfg(not(feature = "btreemap"))]
153impl<K, V, S> SequenceTrie<K, V, S>
154    where K: TrieKey,
155          S: BuildHasher + Default + Clone,
156{
157    pub fn with_hasher(hash_builder: S) -> SequenceTrie<K, V, S> {
158        SequenceTrie {
159            value: None,
160            children: HashMap::with_hasher(hash_builder),
161        }
162    }
163}
164
165#[cfg(feature = "btreemap")]
166impl<K, V> SequenceTrie<K, V>
167    where K: TrieKey,
168{
169    /// Creates a new `SequenceTrie` node with no value and an empty child map.
170    pub fn new() -> SequenceTrie<K, V> {
171        Self::new_generic()
172    }
173}
174
175#[cfg(feature = "btreemap")]
176impl<K, V, S> SequenceTrie<K, V, S>
177    where K: TrieKey,
178          S: BuildHasher + Default + Clone,
179{
180    /// Creates a new `SequenceTrie` node with no value and an empty child map.
181    pub fn new_generic() -> SequenceTrie<K, V, S> {
182        SequenceTrie {
183            value: None,
184            children: BTreeMap::new(),
185            _phantom: PhantomData,
186        }
187    }
188}
189
190impl<K, V, S> SequenceTrie<K, V, S>
191    where K: TrieKey,
192          S: BuildHasher + Default + Clone,
193{
194    /// Retrieve the value stored at this node.
195    pub fn value(&self) -> Option<&V> {
196        self.value.as_ref()
197    }
198
199    /// Retrieve a mutable reference to the value stored at this node.
200    pub fn value_mut(&mut self) -> Option<&mut V> {
201        self.value.as_mut()
202    }
203
204    /// Checks if this node is empty.
205    ///
206    /// A node is considered empty when it has no value and no children.
207    pub fn is_empty(&self) -> bool {
208        self.is_leaf() && self.value.is_none()
209    }
210
211    /// Checks if this node has no descendants.
212    pub fn is_leaf(&self) -> bool {
213        self.children.is_empty()
214    }
215
216    /// Inserts a key and value into the SequenceTrie.
217    ///
218    /// Returns `None` if the key did not already correspond to a value, otherwise the old value is
219    /// returned.
220    pub fn insert<'key, I, Q: 'key + ?Sized>(&mut self, key: I, value: V) -> Option<V>
221        where I: IntoIterator<Item = &'key Q>,
222              Q: ToOwned<Owned = K>,
223              K: Borrow<Q>
224    {
225        self.insert_owned(key.into_iter().map(ToOwned::to_owned), value)
226    }
227
228    /// Version of `insert` that takes an owned sequence of key fragments.
229    ///
230    /// This function is used internally by `insert`.
231    #[cfg(not(feature = "btreemap"))]
232    pub fn insert_owned<I>(&mut self, key: I, value: V) -> Option<V>
233        where I: IntoIterator<Item = K>
234    {
235        let key_node = key.into_iter().fold(self, |current_node, fragment| {
236            let hash_builder = current_node.children.hasher().clone();
237            current_node.children
238                .entry(fragment)
239                .or_insert_with(|| Self::with_hasher(hash_builder))
240        });
241
242        mem::replace(&mut key_node.value, Some(value))
243    }
244
245    #[cfg(feature = "btreemap")]
246    pub fn insert_owned<I>(&mut self, key: I, value: V) -> Option<V>
247        where I: IntoIterator<Item = K>
248    {
249        let key_node = key.into_iter().fold(self, |current_node, fragment| {
250            current_node.children
251                .entry(fragment)
252                .or_insert_with(Self::new_generic)
253        });
254
255        mem::replace(&mut key_node.value, Some(value))
256    }
257
258    /// Finds a reference to a key's value, if it has one.
259    pub fn get<'key, I, Q: ?Sized>(&self, key: I) -> Option<&V>
260        where I: IntoIterator<Item = &'key Q>,
261              K: Borrow<Q>,
262              Q: TrieKey + 'key
263    {
264        self.get_node(key).and_then(|node| node.value.as_ref())
265    }
266
267    /// Finds a reference to a key's node, if it has one.
268    pub fn get_node<'key, I, Q: ?Sized>(&self, key: I) -> Option<&SequenceTrie<K, V, S>>
269        where I: IntoIterator<Item = &'key Q>,
270              K: Borrow<Q>,
271              Q: TrieKey + 'key
272    {
273        let mut current_node = self;
274
275        for fragment in key {
276            match current_node.children.get(fragment) {
277                Some(node) => current_node = node,
278                None => return None,
279            }
280        }
281
282        Some(current_node)
283    }
284
285    /// Finds a mutable reference to a key's value, if it has one.
286    pub fn get_mut<'key, I, Q: ?Sized>(&mut self, key: I) -> Option<&mut V>
287        where I: IntoIterator<Item = &'key Q>,
288              K: Borrow<Q>,
289              Q: TrieKey + 'key
290    {
291        self.get_node_mut(key).and_then(|node| node.value.as_mut())
292    }
293
294    /// Finds a mutable reference to a key's node, if it has one.
295    pub fn get_node_mut<'key, I, Q: ?Sized>(&mut self, key: I) -> Option<&mut SequenceTrie<K, V, S>>
296        where I: IntoIterator<Item = &'key Q>,
297              K: Borrow<Q>,
298              Q: TrieKey + 'key
299    {
300        let mut current_node = Some(self);
301
302        for fragment in key {
303            match current_node.and_then(|node| node.children.get_mut(fragment)) {
304                Some(node) => current_node = Some(node),
305                None => return None,
306            }
307        }
308
309        current_node
310    }
311
312    /// Finds the longest prefix of nodes which match the given key.
313    pub fn get_prefix_nodes<'key, I, Q: ?Sized>(&self, key: I) -> Vec<&SequenceTrie<K, V, S>>
314        where I: 'key + IntoIterator<Item = &'key Q>,
315              K: Borrow<Q>,
316              Q: TrieKey + 'key
317    {
318        self.prefix_iter(key).collect()
319    }
320
321    /// Finds the value of the nearest ancestor with a non-empty value, if one exists.
322    ///
323    /// If all ancestors have empty (`None`) values, `None` is returned.
324    pub fn get_ancestor<'key, I, Q: ?Sized>(&self, key: I) -> Option<&V>
325        where I: 'key + IntoIterator<Item = &'key Q>,
326              K: Borrow<Q>,
327              Q: TrieKey + 'key
328    {
329        self.get_ancestor_node(key).and_then(|node| node.value.as_ref())
330    }
331
332    /// Finds the nearest ancestor with a non-empty value, if one exists.
333    ///
334    /// If all ancestors have empty (`None`) values, `None` is returned.
335    pub fn get_ancestor_node<'key, I, Q: ?Sized>(&self, key: I) -> Option<&SequenceTrie<K, V, S>>
336        where I: 'key + IntoIterator<Item = &'key Q>,
337              K: Borrow<Q>,
338              Q: TrieKey + 'key
339    {
340        self.prefix_iter(key)
341            .filter(|node| node.value.is_some())
342            .last()
343    }
344
345    /// Removes the node corresponding to the given key.
346    ///
347    /// This operation is like the reverse of `insert` in that
348    /// it also deletes extraneous nodes on the path from the root.
349    ///
350    /// If the key node has children, its value is set to `None` and no further
351    /// action is taken. If the key node is a leaf, then it and its ancestors with
352    /// empty values and no other children are deleted. Deletion proceeds up the tree
353    /// from the key node until a node with a non-empty value or children is reached.
354    ///
355    /// If the key doesn't match a node in the Trie, no action is taken.
356    pub fn remove<'key, I, Q: ?Sized>(&mut self, key: I)
357        where I: IntoIterator<Item = &'key Q>,
358              K: Borrow<Q>,
359              Q: TrieKey + 'key
360    {
361        self.remove_recursive(key);
362    }
363
364    /// Recursive remove method that uses the call stack to safely and
365    /// efficiently remove a node and its extraneous ancestors.
366    ///
367    /// Return `true` if the node should be deleted.
368    ///
369    /// See `remove` above.
370    fn remove_recursive<'key, I, Q: ?Sized>(&mut self, key: I) -> bool
371        where I: IntoIterator<Item = &'key Q>,
372              K: Borrow<Q>,
373              Q: TrieKey + 'key
374    {
375        let mut fragments = key.into_iter();
376        match fragments.next() {
377            // Base case: Leaf node, no key left to recurse on.
378            None => {
379                self.value = None;
380            }
381
382            // Recursive case: Inner node, delete children.
383            Some(fragment) => {
384                let delete_child = match self.children.get_mut(fragment) {
385                    Some(child) => child.remove_recursive(fragments),
386                    None => false,
387                };
388
389                if delete_child {
390                    self.children.remove(fragment);
391                }
392                // NB: If the child isn't found, false will be returned.
393                // The `self` node is either a leaf, with a non-trivial value, or an
394                // inner node (with children).
395            }
396        }
397
398        // If the node is childless and valueless, mark it for deletion.
399        self.is_empty()
400    }
401
402    /// Recursively apply a function to every node in the trie.
403    ///
404    /// Nodes are visited "bottom-up" (children before parent).
405    /// If `f` returns a value, it replaces the value at that node.
406    /// Otherwise, the node's value remains unchanged.
407    pub fn map<F>(&mut self, f: F) where F: Fn(&Self) -> Option<V> {
408        self.map_rec(&f)
409    }
410
411    /// Internal version of map that takes the closure by reference.
412    fn map_rec<F>(&mut self, f: &F) where F: Fn(&Self) -> Option<V> {
413        for child in self.children.values_mut() {
414            child.map_rec(f);
415        }
416
417        if let Some(v) = f(&*self) {
418            self.value = Some(v);
419        }
420    }
421
422    /// Returns an iterator over all the key-value pairs in the collection.
423    pub fn iter(&self) -> Iter<K, V, S> {
424        Iter {
425            root: self,
426            root_visited: false,
427            key: vec![],
428            stack: vec![],
429        }
430    }
431
432    /// Returns an iterator over all the keys in the trie.
433    pub fn keys(&self) -> Keys<K, V, S> {
434        Keys { inner: self.iter() }
435    }
436
437    /// Returns an iterator over all the values stored in the trie.
438    pub fn values(&self) -> Values<K, V, S> {
439        Values { inner: self.iter() }
440    }
441
442    /// Returns an iterator over the longest prefix of nodes which match the given key.
443    pub fn prefix_iter<'trie, 'key, I, Q: ?Sized>(&'trie self,
444                                                  key: I)
445                                                  -> PrefixIter<'trie, 'key, K, V, Q, I::IntoIter, S>
446        where I: IntoIterator<Item = &'key Q>,
447              K: Borrow<Q>,
448              Q: TrieKey + 'key
449    {
450        PrefixIter {
451            next_node: Some(self),
452            fragments: key.into_iter(),
453            _phantom: PhantomData,
454        }
455    }
456
457    /// Return all the children of this node, in an arbitrary order.
458    pub fn children(&self) -> Vec<&Self> {
459        self.children.values().collect()
460    }
461
462    /// Children of this node, with their associated keys in arbitrary order.
463    pub fn children_with_keys<'a>(&'a self) -> Vec<(&'a K, &'a Self)> {
464        self.children.iter().collect()
465    }
466}
467
468/// Iterator over the keys and values of a `SequenceTrie`.
469pub struct Iter<'a, K: 'a, V: 'a, S: 'a = RandomState>
470    where K: TrieKey,
471          S: BuildHasher + Default
472{
473    root: &'a SequenceTrie<K, V, S>,
474    root_visited: bool,
475    key: Vec<&'a K>,
476    stack: Vec<StackItem<'a, K, V, S>>,
477}
478
479/// Vector of key fragment references and values, yielded during iteration.
480pub type KeyValuePair<'a, K, V> = (Vec<&'a K>, &'a V);
481
482/// Iterator over the keys of a `SequenceTrie`.
483pub struct Keys<'a, K: 'a, V: 'a, S: 'a = RandomState>
484    where K: TrieKey,
485          S: BuildHasher + Default
486{
487    inner: Iter<'a, K, V, S>,
488}
489
490/// Iterator over the values of a `SequenceTrie`.
491pub struct Values<'a, K: 'a, V: 'a, S: 'a = RandomState>
492    where K: TrieKey,
493          S: BuildHasher + Default
494{
495    inner: Iter<'a, K, V, S>,
496}
497
498/// Information stored on the iteration stack whilst exploring.
499struct StackItem<'a, K: 'a, V: 'a, S: 'a = RandomState>
500    where K: TrieKey,
501          S: BuildHasher + Default
502{
503    #[cfg(not(feature = "btreemap"))]
504    child_iter: hash_map::Iter<'a, K, SequenceTrie<K, V, S>>,
505    #[cfg(feature = "btreemap")]
506    child_iter: btree_map::Iter<'a, K, SequenceTrie<K, V, S>>,
507}
508
509/// Delayed action type for iteration stack manipulation.
510enum IterAction<'a, K: 'a, V: 'a, S: 'a>
511    where K: TrieKey,
512          S: BuildHasher + Default
513{
514    Push(&'a K, &'a SequenceTrie<K, V, S>),
515    Pop,
516}
517
518impl<'a, K, V, S> Iterator for Iter<'a, K, V, S>
519    where K: TrieKey,
520          S: BuildHasher + Default
521{
522    type Item = KeyValuePair<'a, K, V>;
523
524    fn next(&mut self) -> Option<KeyValuePair<'a, K, V>> {
525        use IterAction::*;
526
527        // Special handling for the root.
528        if !self.root_visited {
529            self.root_visited = true;
530            self.stack.push(StackItem { child_iter: self.root.children.iter() });
531            if let Some(ref root_val) = self.root.value {
532                return Some((vec![], root_val));
533            }
534        }
535
536        loop {
537            let action = match self.stack.last_mut() {
538                Some(stack_top) => {
539                    match stack_top.child_iter.next() {
540                        Some((fragment, child_node)) => Push(fragment, child_node),
541                        None => Pop,
542                    }
543                }
544                None => return None,
545            };
546
547            match action {
548                Push(fragment, node) => {
549                    self.stack.push(StackItem { child_iter: node.children.iter() });
550                    self.key.push(fragment);
551                    if let Some(ref value) = node.value {
552                        return Some((self.key.clone(), value));
553                    }
554                }
555                Pop => {
556                    self.key.pop();
557                    self.stack.pop();
558                }
559            }
560        }
561    }
562}
563
564impl<'a, K, V, S> Iterator for Keys<'a, K, V, S>
565    where K: TrieKey,
566          S: BuildHasher + Default,
567{
568    type Item = Vec<&'a K>;
569
570    fn next(&mut self) -> Option<Vec<&'a K>> {
571        self.inner.next().map(|(k, _)| k)
572    }
573}
574
575impl<'a, K, V, S> Iterator for Values<'a, K, V, S>
576    where K: TrieKey,
577          S: BuildHasher + Default
578{
579    type Item = &'a V;
580
581    fn next(&mut self) -> Option<&'a V> {
582        self.inner.next().map(|(_, v)| v)
583    }
584}
585
586impl<K, V, S> PartialEq for SequenceTrie<K, V, S>
587    where K: TrieKey,
588          V: PartialEq,
589          S: BuildHasher + Default
590{
591    fn eq(&self, other: &Self) -> bool {
592        self.value == other.value && self.children == other.children
593    }
594}
595
596impl<K, V, S> Eq for SequenceTrie<K, V, S>
597    where K: TrieKey,
598          V: Eq,
599          S: BuildHasher + Default
600{}
601
602impl<K, V, S> Default for SequenceTrie<K, V, S>
603    where K: TrieKey,
604          S: BuildHasher + Default + Clone
605{
606    fn default() -> Self {
607        #[cfg(not(feature = "btreemap"))]
608        { SequenceTrie::with_hasher(S::default()) }
609        #[cfg(feature = "btreemap")]
610        { SequenceTrie::new_generic() }
611    }
612}
613
614/// Iterator over the longest prefix of nodes which matches a key.
615pub struct PrefixIter<'trie, 'key, K, V, Q: ?Sized, I, S = RandomState>
616    where K: 'trie + TrieKey,
617          V: 'trie,
618          I: 'key + Iterator<Item = &'key Q>,
619          K: Borrow<Q>,
620          Q: TrieKey + 'key,
621          S: 'trie + BuildHasher + Default
622{
623    next_node: Option<&'trie SequenceTrie<K, V, S>>,
624    fragments: I,
625    _phantom: PhantomData<&'key I>,
626}
627
628impl<'trie, 'key, K, V, Q: ?Sized, I, S> Iterator for PrefixIter<'trie, 'key, K, V, Q, I, S>
629    where K: 'trie + TrieKey,
630          V: 'trie,
631          I: 'key + Iterator<Item = &'key Q>,
632          K: Borrow<Q>,
633          Q: TrieKey + 'key,
634          S: BuildHasher + Default
635{
636    type Item = &'trie SequenceTrie<K, V, S>;
637
638    fn next(&mut self) -> Option<Self::Item> {
639        if let Some(current_node) = self.next_node.take() {
640            if let Some(fragment) = self.fragments.next() {
641                self.next_node = current_node.children.get(fragment)
642            }
643
644            return Some(current_node);
645        }
646
647        None
648    }
649
650    fn size_hint(&self) -> (usize, Option<usize>) {
651        let lower = if self.next_node.is_some() {
652            1
653        } else {
654            0
655        };
656
657        (lower, self.fragments.size_hint().1)
658    }
659}