v_trie/
lib.rs

1/*!
2 * A compressed prefix tree implementation.
3 *
4 * Accepted keys are any type with slices that implement Eq.
5 * Accepted values are anything that the tree can own.
6 *
7 * It is recommended to use static sized numbers or strings. Use `u8` if
8 * string keys are being used.
9 * 
10 * Using the Trie with strings requires using the designated string
11 * functions; this will automatically turn the string into a slice of 
12 * u8's which is compatible with the tree. Unicode equivalency is not
13 * checked.
14 * 
15 * It is not recommended to use the Trie with large structs as the key,
16 * as performance will suffer due to the key being cloned into the prefix
17 * tree, as opposed to referenced or taken into ownership.
18 * 
19 * Some unimplemented todos include:
20 * - Add support for iteration
21 * - Add support for in-place iteration, without IntoIter
22 * 
23 * In-place iteration without requiring the usage of Rc's or unsafe will likely be difficult due to the
24 * tree's unidirectional nature.
25 */
26
27#![forbid(unsafe_code, missing_docs, missing_debug_implementations)]
28
29cfg_if::cfg_if! {
30    if #[cfg(feature ="std")] {
31        extern crate std;
32        use std::vec::Vec;
33    }
34    else {
35        extern crate alloc;
36        use alloc::vec::Vec;
37    }
38}
39
40#[derive(Debug)]
41/// Error returned if attempting to set a key with try_set, and the key already exists.
42pub struct KeyExistsError;
43
44#[derive(Debug)]
45/// Error returned if attempting to remove a key that does not exist.
46pub struct KeyNotFoundError;
47
48/// compressed prefix tree
49///
50/// holds arbitrary values, uses string keys
51/// common slices of stored keys are compressed by
52/// not storing duplicates of those common slices.
53#[derive(Debug, PartialEq, Eq)]
54pub struct Trie<K: Eq + Clone, V> {
55    /// tree root
56    /// this will always be a node with the empty string.
57    root: TrieNode<K, V>,
58}
59
60#[derive(Debug, PartialEq, Eq)]
61struct TrieNode<K: Eq + Clone, V> {
62    children: Vec<TrieNode<K, V>>,
63    value: Option<V>,
64    prefix: Box<[K]>,
65}
66
67impl<K: Eq + Clone, V> Trie<K, V> {
68    /// constructs an empty prefix tree
69    pub fn new() -> Self {
70        Trie {
71            root: TrieNode {
72                value: None,
73                prefix: Box::new([]),
74                children: Vec::new(),
75            },
76        }
77    }
78
79    /// gets the value of a key
80    #[inline]
81    fn get(&self, key: &[K]) -> Option<&V> {
82        self.root.get(key)
83    }
84
85    /// gets the value of a key as mutable
86    #[inline]
87    pub fn get_mut(&mut self, key: &[K]) -> Option<&mut V> {
88        self.root.get_mut(key)
89    }
90
91    /// checks if a key exists
92    #[inline]
93    pub fn has(&self, key: &[K]) -> bool {
94        self.get(key).is_some()
95    }
96
97    /// sets a key to a value
98    /// returns the key evicted if there was already a key.
99    #[inline]
100    pub fn put(&mut self, key: &[K], val: V) -> Option<V> {
101        self.root.insert(key, val)
102    }
103
104    /// sets a key to a value
105    /// returns an Err() if the key already existed.
106    #[inline]
107    pub fn try_put(&mut self, key: &[K], val: V) -> Result<(), KeyExistsError> {
108        match self.has(key) {
109            true => Err(KeyExistsError),
110            false => {
111                self.put(key, val);
112                Ok(())
113            }
114        }
115    }
116
117    /// removes a key
118    ///
119    /// Ok() if key existed, Err() otherwise
120    #[inline]
121    pub fn remove(&mut self, key: &[K]) -> Result<V, KeyNotFoundError> {
122        match self.root.remove(key) {
123            None => Err(KeyNotFoundError),
124            Some(data) => Ok(data),
125        }
126    }
127
128    /// Gets the size of the tree in terms of nodes.
129    #[inline]
130    pub fn size(&self) -> usize {
131        self.root.size()
132    }
133}
134
135impl<V> Trie<u8, V> {
136    /// Puts a value in with a certain string key.
137    /// The old value is ejected if it exists.
138    pub fn put_str(&mut self, key: &str, val: V) -> Option<V> {
139        self.put(key.as_bytes(), val)
140    }
141
142    /// Puts a value in with a certain string key.
143    /// Errors if there is already a value for the given string.
144    pub fn try_put_str(&mut self, key: &str, val: V) -> Result<(), KeyExistsError> {
145        self.try_put(key.as_bytes(), val)
146    }
147
148    /// Gets a reference to the value associated to the bytes of a given string key.
149    pub fn get_str(&mut self, key: &str) -> Option<&V> {
150        self.get(key.as_bytes())
151    }
152
153    /// Gets a mutable reference to the value associated to the bytes of a given string key.
154    pub fn get_mut_str(&mut self, key: &str) -> Option<&mut V> {
155        self.get_mut(key.as_bytes())
156    }
157
158    /// Checks if a given string key is associated to a value.
159    pub fn has_str(&mut self, key: &str) -> bool {
160        self.has(key.as_bytes())
161    }
162
163    /// Removes a given string key from the Trie.
164    pub fn remove_str(&mut self, key: &str) -> Result<V, KeyNotFoundError> {
165        self.remove(key.as_bytes())
166    }
167}
168
169impl<K: Eq + Clone, V> TrieNode<K, V> {
170    fn size(&self) -> usize {
171        let mut size = 1;
172        for other in self.children.iter() {
173            size += other.size();
174        }
175        return size;
176    }
177
178    fn get(&self, key: &[K]) -> Option<&V> {
179        if key == self.prefix.as_ref() {
180            return self.value.as_ref();
181        }
182        let rest = &key[self.prefix.len()..];
183        let leaf = self.leaf(rest);
184        match leaf {
185            None => None,
186            Some(node) => node.get(rest),
187        }
188    }
189
190    fn leaf(&self, key: &[K]) -> Option<&Self> {
191        for node in self.children.iter() {
192            if key.starts_with(node.prefix.as_ref()) {
193                return Some(&node);
194            }
195        }
196        None
197    }
198
199    fn get_mut(&mut self, key: &[K]) -> Option<&mut V> {
200        if key == self.prefix.as_ref() {
201            return self.value.as_mut();
202        }
203        let rest = &key[self.prefix.len()..];
204        let leaf = self.leaf_mut(rest);
205        match leaf {
206            None => None,
207            Some(node) => node.get_mut(rest),
208        }
209    }
210
211    fn leaf_mut(&mut self, key: &[K]) -> Option<&mut Self> {
212        for node in self.children.iter_mut() {
213            if key.starts_with(&node.prefix) {
214                return Some(node);
215            }
216        }
217        None
218    }
219
220    fn insert(&mut self, key: &[K], value: V) -> Option<V> {
221        if key == self.prefix.as_ref() {
222            return self.value.replace(value);
223        }
224        let rest = &key[self.prefix.len()..];
225        let leaf = self.leaf_mut(rest);
226        // still longer than leaf, and leaf exists
227        if leaf.is_some() {
228            return leaf.unwrap().insert(rest, value);
229        }
230        // shorter than a valid leaf split target
231        let split = self.insert_split_target(rest);
232        if split.is_some() {
233            let (idx, node) = split.unwrap();
234            let inject = TrieNode {
235                prefix: (&rest[(rest.len() - 1)..(node.prefix.len() - rest.len())])
236                    .to_owned()
237                    .into_boxed_slice(),
238                children: Vec::new(),
239                value: Some(value),
240            };
241            let moved = std::mem::replace(&mut self.children[idx], inject);
242            self.children[idx].children.push(moved);
243            return None;
244        }
245        // neither a leaf is our prefix, nor are we a leaf prefix, inject new leaf.
246        let inject = TrieNode {
247            prefix: rest.to_owned().into_boxed_slice(),
248            children: Vec::new(),
249            value: Some(value),
250        };
251        self.children.push(inject);
252        return None;
253    }
254
255    fn insert_split_target(&mut self, key: &[K]) -> Option<(usize, &mut Self)> {
256        self.children
257            .iter_mut()
258            .enumerate()
259            .find(|(_idx, node)| node.prefix.starts_with(key))
260    }
261
262    fn remove(&mut self, key: &[K]) -> Option<V> {
263        if key == self.prefix.as_ref() {
264            // us, this should only happen on first node. eject value.
265            return self.value.take();
266        }
267        self.remove_internal(&key[self.prefix.len()..])
268    }
269
270    fn remove_internal(&mut self, key: &[K]) -> Option<V> {
271        // get leaf node
272        let rest = &key[self.prefix.len()..];
273        let leaf = self.leaf_mut(rest);
274        if leaf.is_none() {
275            // not found, bail
276            return None;
277        }
278        // unwrap it - this relies on local variable shadowing
279        let leaf = leaf.unwrap();
280        // leaf is not exact
281        if leaf.prefix.as_ref() != rest {
282            // kick it down
283            return leaf.remove_internal(rest);
284        }
285        // leaf is exact
286        // evict value
287        let evicted = leaf.value.take();
288        // some options
289        match leaf.children.len() {
290            0 => {
291                // empty, evict
292                let prefix = leaf.prefix.clone();
293                self.evict_node_with_prefix(prefix.as_ref());
294            }
295            1 => {
296                // 1 node. it should take the node below it.
297                leaf.take_below();
298            }
299            _ => {
300                // more than 1 node; do nothing, as it needs to stay there to be a split/branching node.
301            }
302        }
303        match self.children.len() {
304            1 => {
305                // we only have one child, we should take the node we just read
306                self.take_below();
307            }
308            _ => {
309                // can't do anything, we need to be a branching node
310            }
311        }
312        // return evicted value
313        evicted
314    }
315
316    fn evict_node_with_prefix(&mut self, prefix: &[K]) {
317        self.children.swap_remove(
318            self.children
319                .iter()
320                .enumerate()
321                .find(|(_idx, n)| n.prefix.as_ref() == prefix)
322                .unwrap()
323                .0,
324        );
325    }
326
327    fn take_below(&mut self) {
328        // this only makes sense if we only have 1 node.
329        assert!(self.children.len() == 1);
330        // take the node from below
331        let taken = std::mem::replace(&mut self.children[0].children, Vec::new());
332        // remove the node we have
333        let node = self.children.remove(0);
334        // steal their prefix
335        let prefix = node.prefix.to_owned();
336        // drop that node just in case
337        std::mem::drop(node);
338        // replace our children with that node
339        self.children = taken;
340        // append their prefix to ours
341        self.prefix = [self.prefix.as_ref(), prefix.as_ref()].concat().into();
342    }
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348
349    #[test]
350    fn insertion_retrieval() {
351        let mut trie = Trie::new();
352        let v1 = vec!["a", "ab", "ac", "b", "c", "abc", "abcde", "abced"];
353        let v2 = vec![1, 2, 3, 4, 5, 6, 7, 9];
354        for i in 0..8 {
355            trie.put_str(v1[i], v2[i]);
356        }
357        for i in 0..8 {
358            assert_eq!(trie.get_str(v1[i]), Some(&v2[i]));
359        }
360        assert_eq!(trie.size(), 9);
361        trie.put_str(v1[3], 33);
362        assert_eq!(trie.get_str(v1[3]), Some(&33));
363        assert_eq!(trie.size(), 9);
364    }
365
366    #[test]
367    fn insertion_deletion() {
368        let mut trie = Trie::new();
369        let v1 = vec!["a", "ab", "ac", "b", "c", "abc", "abcde", "abced"];
370        let v2 = vec![1, 2, 3, 4, 5, 6, 7, 9];
371        for i in 0..8 {
372            trie.put_str(v1[i], v2[i]);
373        }
374        for i in 0..8 {
375            assert_eq!(trie.get_str(v1[i]), Some(&v2[i]));
376        }
377        assert_eq!(trie.size(), 9);
378        let removed = trie.remove_str("abcd");
379        assert!(removed.is_err());
380        let removed = trie.remove_str("abcde");
381        assert_eq!(removed.ok(), Some(7));
382        assert_eq!(trie.size(), 7);
383        let removed: Result<i32, KeyNotFoundError> = trie.remove_str("c");
384        assert_eq!(removed.ok(), Some(5));
385        assert_eq!(trie.size(), 6);
386        let removed = trie.remove_str("abcde");
387        assert!(removed.is_err());
388        assert_eq!(trie.size(), 6);
389    }
390
391    #[test]
392    fn i32_tests() {
393        let mut trie = Trie::new();
394        let v1: Vec<Box<[i32]>> = vec![[1].into(), [2].into(), [3].into(), [4].into(), [2, 3, 4].into(), [2, 3, 4, 5].into(), [3, 5, 1].into(), [1, 11, 111].into(), [1, 111, 11].into()];
395        let v2 = vec!["a", "b", "c", "d", "e", "f", "g" ,"h", "i"];
396        for i in 0..v1.len() {
397            trie.put(v1[i].as_ref(), v2[i].to_owned());
398        }
399        for i in 0..v1.len() {
400            assert_eq!(trie.get(v1[i].as_ref()), Some(&v2[i].to_owned()));
401        }
402    }
403}