basic_trie/trie/
regular_trie.rs

1use std::cmp::Ordering;
2use std::ops;
3
4use arrayvec::ArrayString;
5#[cfg(feature = "serde")]
6use serde_crate::{Deserialize, Serialize};
7
8use crate::trie::get_characters;
9use crate::trie_node::TrieDatalessNode;
10
11#[derive(Debug, Default, Clone)]
12#[cfg_attr(
13    feature = "serde",
14    derive(Serialize, Deserialize),
15    serde(crate = "serde_crate")
16)]
17pub struct Trie {
18    root: TrieDatalessNode,
19    len: usize,
20}
21
22impl Trie {
23    pub fn new() -> Self {
24        Trie {
25            root: TrieDatalessNode::new(),
26            len: 0,
27        }
28    }
29
30    /// Insert a word into the trie, with no corresponding data.
31    ///
32    /// # Examples
33    ///
34    /// ```
35    /// use basic_trie::Trie;
36    /// let mut trie = Trie::new();
37    ///
38    /// trie.insert("word1");
39    /// assert_eq!(vec![String::from("word1")], trie.get_all());
40    /// ```
41    pub fn insert(&mut self, word: &str) {
42        let characters = get_characters(word);
43        let mut current = &mut self.root;
44
45        for character in characters {
46            current = current
47                .children
48                .entry(ArrayString::from(character).unwrap())
49                .or_default();
50        }
51
52        if !current.is_associated() {
53            self.len += 1;
54        }
55
56        current.associate();
57    }
58
59    /// Removes a word from the trie.
60    /// If the word is a prefix to some word, some word
61    /// isn't removed from the trie.
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// use basic_trie::Trie;
67    /// let mut trie = Trie::new();
68    ///
69    /// trie.insert("word");
70    /// trie.insert("wording");
71    ///
72    /// trie.remove("word");
73    /// assert_eq!(vec![String::from("wording")], trie.get("word").unwrap());
74    ///
75    /// trie.remove("wording");
76    /// assert_eq!(Vec::<String>::new(), trie.get_all());
77    /// ```
78    pub fn remove(&mut self, word: &str) {
79        let Some(current) = self.get_final_node_mut(word) else {
80            return;
81        };
82
83        let characters = get_characters(word);
84
85        if !current.children.is_empty() {
86            return if current.is_associated() {
87                current.disassociate();
88                self.len -= 1;
89            };
90        }
91
92        self.root.remove_one_word(characters.into_iter());
93        self.len -= 1;
94    }
95
96    /// Removes every word that begins with 'prefix'.
97    /// Not including the word 'prefix' if it's present.
98    ///
99    /// # Examples
100    ///
101    /// ```
102    /// use basic_trie::Trie;
103    /// let mut trie = Trie::new();
104    ///
105    /// trie.insert("eat");
106    /// trie.insert("eats");
107    /// trie.insert("eating");
108    /// trie.insert("eatings");
109    /// trie.insert("ea");
110    ///
111    /// trie.remove_prefix("ea");
112    ///
113    /// assert_eq!(vec![String::from("ea")], trie.get_all());
114    /// ```
115    pub fn remove_prefix(&mut self, prefix: &str) {
116        let Some(current) = self.get_final_node_mut(prefix) else {
117            return;
118        };
119
120        // (current.is_associated() as usize) is added (subtracted twice) to
121        // not remove the current word from the count. Literal '1' is not used
122        // because of calling this function on the root node where 1 should
123        // not be added.
124        self.len -= current.remove_all_words() - (current.is_associated() as usize);
125    }
126
127    /// Returns an option enum with a vector of owned strings
128    /// representing all found words that begin with 'query'.
129    /// If the word 'query' doesn't exist, None is returned.
130    ///
131    /// # Examples
132    ///
133    /// ```
134    /// use basic_trie::Trie;
135    /// let mut trie = Trie::new();
136    ///
137    /// trie.insert("word1");
138    /// trie.insert("word2");
139    ///
140    /// let all_correct_words = vec![String::from("word1"), String::from("word2")];
141    /// let mut found_words = trie.get("word").unwrap();
142    /// found_words.sort();
143    /// assert_eq!(all_correct_words, found_words);
144    /// ```
145    pub fn get(&self, query: &str) -> Option<Vec<String>> {
146        let mut substring = String::new();
147        let mut current_node = &self.root;
148        let characters = get_characters(query);
149
150        for character in characters {
151            current_node = match current_node.children.get(character) {
152                None => return None,
153                Some(trie_node) => {
154                    substring.push_str(character);
155                    trie_node
156                }
157            }
158        }
159
160        let mut words_vec = Vec::new();
161        current_node.find_words(&substring, &mut words_vec);
162
163        Some(words_vec)
164    }
165
166    /// Returns the vector of longest words found in the trie.
167    ///
168    /// # Examples
169    ///
170    /// ```
171    /// use basic_trie::Trie;
172    /// let mut trie = Trie::new();
173    ///
174    /// trie.insert("shortwrd");
175    /// trie.insert("verylongword");
176    /// trie.insert("somelongword");
177    ///
178    /// let longest_words = vec![String::from("somelongword"), String::from("verylongword")];
179    /// let mut found_words = trie.get_longest();
180    /// found_words.sort();
181    /// assert_eq!(longest_words, found_words);
182    /// ```
183    pub fn get_longest(&self) -> Vec<String> {
184        let mut words = Vec::new();
185        self.root.words_min_max("", &mut words, Ordering::Greater);
186        words
187    }
188
189    /// Returns the vector of shortest words found in the trie.
190    ///
191    /// # Examples
192    ///
193    /// ```
194    /// use basic_trie::Trie;
195    /// let mut trie = Trie::new();
196    ///
197    /// trie.insert("shortwrd");
198    /// trie.insert("rlyshort");
199    /// trie.insert("verylongword");
200    ///
201    /// let shortest_word = vec![String::from("rlyshort"), String::from("shortwrd")];
202    /// let mut found_words = trie.get_shortest();
203    /// found_words.sort();
204    /// assert_eq!(shortest_word, found_words);
205    /// ```
206    pub fn get_shortest(&self) -> Vec<String> {
207        let mut words = Vec::new();
208        self.root.words_min_max("", &mut words, Ordering::Less);
209        words
210    }
211
212    /// Returns the number of words in the trie.
213    ///
214    /// # Examples
215    ///
216    /// ```
217    /// use basic_trie::Trie;
218    /// let mut trie = Trie::new();
219    ///
220    /// trie.insert("word1");
221    /// trie.insert("word2");
222    /// trie.insert("word3");
223    /// trie.insert("word4");
224    /// assert_eq!(4, trie.len());
225    ///
226    /// trie.remove("word1");
227    /// assert_eq!(3, trie.len());
228    ///
229    /// trie.remove_prefix("w");
230    /// assert_eq!(0, trie.len());
231    /// ```
232    pub fn len(&self) -> usize {
233        self.len
234    }
235
236    /// Returns the number of words that start with 'prefix'.
237    /// If the sequence 'prefix' is not found, None is returned.
238    ///
239    /// # Examples
240    /// ```
241    /// use basic_trie::Trie;
242    /// let mut trie = Trie::new();
243    ///
244    /// trie.insert("word1");
245    /// trie.insert("word2");
246    /// trie.insert("word3");
247    /// trie.insert("word4");
248    /// trie.insert("word");
249    /// assert_eq!(4, trie.len_prefix("word"));
250    /// ```
251    pub fn len_prefix(&self, prefix: &str) -> usize {
252        match self.get_final_node(prefix) {
253            None => 0,
254            Some(node) => node.count_words() - node.is_associated() as usize,
255        }
256    }
257
258    /// Returns an option enum with a vector of owned strings
259    /// representing all words in the trie.
260    /// Order is not guaranteed.
261    ///
262    /// # Examples
263    ///
264    /// ```
265    /// use basic_trie::Trie;
266    /// let mut trie = Trie::new();
267    ///
268    /// trie.insert("word1");
269    /// trie.insert("word2");
270    /// trie.insert("word3");
271    /// trie.insert("word4");
272    /// trie.insert("word5");
273    ///
274    /// let all_words = vec![
275    ///     String::from("word1"), String::from("word2"), String::from("word3"),
276    ///     String::from("word4"), String::from("word5")
277    /// ];
278    ///
279    /// let mut found_words = trie.get_all();
280    /// found_words.sort();
281    ///
282    /// assert_eq!(all_words, found_words);
283    /// ```
284    pub fn get_all(&self) -> Vec<String> {
285        self.get("").unwrap()
286    }
287
288    /// Returns true if the trie contains 'query' as a word.
289    ///
290    /// # Examples
291    ///
292    /// ```
293    /// use basic_trie::Trie;
294    /// let mut trie = Trie::new();
295    ///
296    /// trie.insert("word");
297    /// assert!(trie.contains("word"));
298    /// assert!(!trie.contains("notfound"));
299    /// ```
300    pub fn contains(&self, query: &str) -> bool {
301        self.get_final_node(query)
302            .map_or(false, |node| node.is_associated())
303    }
304
305    /// Returns true if no words are in the trie.
306    ///
307    /// # Examples
308    ///
309    /// ```
310    /// use basic_trie::Trie;
311    /// let mut trie = Trie::new();
312    ///
313    /// trie.insert("word");
314    /// trie.remove("word");
315    ///
316    /// assert!(trie.is_empty());
317    /// ```
318    pub fn is_empty(&self) -> bool {
319        self.len == 0
320    }
321
322    /// Removes all words from the trie.
323    ///
324    /// # Examples
325    ///
326    /// ```
327    /// use basic_trie::Trie;
328    /// let mut trie = Trie::new();
329    ///
330    /// trie.insert("word1");
331    /// trie.insert("word2");
332    /// trie.insert("word3");
333    /// trie.insert("word4");
334    ///
335    /// trie.clear();
336    /// assert!(trie.is_empty());
337    /// assert_eq!(0, trie.len());
338    /// ```
339    pub fn clear(&mut self) {
340        self.root.clear_children();
341        self.len = 0;
342    }
343
344    /// Function for getting the last node in a character sequence.
345    fn get_final_node(&self, query: &str) -> Option<&TrieDatalessNode> {
346        let mut current = &self.root;
347
348        for character in get_characters(query) {
349            current = match current.children.get(character) {
350                None => return None,
351                Some(next_node) => next_node,
352            }
353        }
354
355        Some(current)
356    }
357
358    /// Function for getting the last node in a character sequence (mutable).
359    fn get_final_node_mut(&mut self, query: &str) -> Option<&mut TrieDatalessNode> {
360        let mut current = &mut self.root;
361
362        for character in get_characters(query) {
363            current = match current.children.get_mut(character) {
364                None => return None,
365                Some(next_node) => next_node,
366            }
367        }
368
369        Some(current)
370    }
371}
372
373impl ops::Add for Trie {
374    type Output = Trie;
375
376    /// Operation + merges two tries, leaving out duplicate words.
377    /// The smaller trie is always added to the larger one for efficiency.
378    ///
379    /// # Examples
380    ///
381    /// ```
382    /// use basic_trie::Trie;
383    /// let mut trie_1 = Trie::new();
384    /// trie_1.insert("word1");
385    /// trie_1.insert("word2");
386    /// trie_1.insert("word");
387    ///
388    /// let mut trie_2 = Trie::new();
389    /// trie_2.insert("word3");
390    /// trie_2.insert("word");
391    ///
392    /// let mut correct = Trie::new();
393    /// correct.insert("word");
394    /// correct.insert("word1");
395    /// correct.insert("word2");
396    /// correct.insert("word3");
397    ///
398    /// let trie_3 = trie_1 + trie_2;
399    ///
400    /// assert_eq!(trie_3, correct);
401    /// ```
402    fn add(self, rhs: Self) -> Self::Output {
403        let (smaller, mut bigger) = if self.len < rhs.len {
404            (self, rhs)
405        } else {
406            (rhs, self)
407        };
408
409        bigger.root += smaller.root;
410
411        // Number of words needs to be recalculated.
412        bigger.len = bigger.root.count_words();
413
414        bigger
415    }
416}
417
418impl ops::AddAssign for Trie {
419    /// Operation += merges two tries, leaving out duplicate words.
420    ///
421    /// # Examples
422    ///
423    /// ```
424    /// use basic_trie::Trie;
425    /// let mut trie_1 = Trie::new();
426    /// trie_1.insert("word1");
427    /// trie_1.insert("word2");
428    /// trie_1.insert("word");
429    ///
430    /// let mut trie_2 = Trie::new();
431    /// trie_2.insert("word3");
432    /// trie_2.insert("word");
433    ///
434    /// let mut correct = Trie::new();
435    /// correct.insert("word");
436    /// correct.insert("word1");
437    /// correct.insert("word2");
438    /// correct.insert("word3");
439    ///
440    /// trie_1 += trie_2;
441    ///
442    /// assert_eq!(trie_1, correct);
443    /// ```
444    fn add_assign(&mut self, rhs: Self) {
445        self.root += rhs.root;
446
447        // Number of words needs to be recalculated.
448        self.len = self.root.count_words();
449    }
450}
451
452impl PartialEq for Trie {
453    /// # Examples
454    ///
455    /// ```
456    /// use basic_trie::Trie;
457    /// let mut trie_1 = Trie::new();
458    /// trie_1.insert("test");
459    ///
460    /// let mut trie_2 = Trie::new();
461    /// trie_2.insert("test");
462    ///
463    /// assert_eq!(trie_1, trie_2);
464    ///
465    /// trie_2.insert("test2");
466    ///
467    /// assert_ne!(trie_1, trie_2);
468    /// ```
469    fn eq(&self, other: &Self) -> bool {
470        self.len == other.len && self.root == other.root
471    }
472}