basic_trie/trie_node/
data_node.rs1use fxhash::FxHashMap;
2use std::cmp::Ordering;
3use std::ops;
4use thin_vec::ThinVec;
5
6#[cfg(feature = "serde")]
7use serde_crate::{Deserialize, Serialize};
8
9type WordEnd<D> = Option<ThinVec<D>>;
10
11pub(crate) struct RemoveData<D> {
16 must_keep: bool,
17 pub(crate) data: WordEnd<D>,
18}
19
20#[derive(Debug, Default, Clone)]
22#[cfg_attr(
23 feature = "serde",
24 derive(Serialize, Deserialize),
25 serde(crate = "serde_crate")
26)]
27pub struct TrieDataNode<D> {
28 #[cfg_attr(feature = "serde", serde(rename = "c"))]
29 pub(crate) children: Box<FxHashMap<arrayvec::ArrayString<4>, TrieDataNode<D>>>,
30 #[cfg_attr(feature = "serde", serde(rename = "wed"))]
31 word_end_data: WordEnd<D>,
32}
33
34impl<D> TrieDataNode<D> {
36 pub(crate) fn new() -> Self {
38 TrieDataNode {
39 children: Default::default(),
40 word_end_data: None,
41 }
42 }
43
44 pub(crate) fn remove_all_words_collect(&mut self, found_data: &mut Vec<D>) -> usize {
47 let num_removed = self
48 .children
49 .values_mut()
50 .map(|child| child.remove_all_words_collect(found_data))
51 .sum::<usize>()
52 + self.is_associated() as usize;
53
54 if let Some(data_vec) = self.disassociate() {
55 found_data.extend(data_vec);
56 }
57
58 self.clear_children();
59
60 num_removed
61 }
62
63 pub(crate) fn count_words(&self) -> usize {
65 self.children
66 .values()
67 .map(|child| child.count_words())
68 .sum::<usize>()
69 + self.is_associated() as usize
70 }
71
72 pub(crate) fn generate_all_data<'a>(&'a self, found_data: &mut Vec<&'a D>) {
75 if let Some(data_vec) = &self.word_end_data {
76 found_data.extend(data_vec.iter());
77 }
78
79 self.children
80 .values()
81 .for_each(|x| x.generate_all_data(found_data));
82 }
83
84 pub(crate) fn generate_all_data_mut<'a>(&'a mut self, found_data: &mut Vec<&'a mut D>) {
87 if let Some(data_vec) = &mut self.word_end_data {
88 found_data.extend(data_vec.iter_mut());
89 }
90
91 self.children
92 .values_mut()
93 .for_each(|x| x.generate_all_data_mut(found_data));
94 }
95
96 pub(crate) fn push_data(&mut self, data: D) {
98 self.get_association_mut().as_mut().unwrap().push(data);
99 }
100
101 pub(crate) fn find_words(&self, substring: &str, found_words: &mut Vec<String>) {
104 if self.is_associated() {
105 found_words.push(substring.to_string());
106 }
107
108 self.children.iter().for_each(|(character, node)| {
109 node.find_words(&(substring.to_owned() + character), found_words)
110 });
111 }
112
113 pub(crate) fn words_min_max(
117 &self,
118 substring: &str,
119 found_words: &mut Vec<String>,
120 ord: Ordering,
121 ) {
122 'word: {
123 if self.is_associated() {
124 if let Some(found) = found_words.first() {
125 match substring.len().cmp(&found.len()) {
126 Ordering::Less if ord == Ordering::Less => {
127 found_words.clear();
128 }
129 Ordering::Greater if ord == Ordering::Greater => {
130 found_words.clear();
131 }
132 Ordering::Equal => (),
133 _ => break 'word,
134 }
135 }
136 found_words.push(substring.to_string());
137 }
138 }
139
140 self.children.iter().for_each(|(character, node)| {
141 node.words_min_max(&(substring.to_owned() + character), found_words, ord)
142 });
143 }
144
145 pub(crate) fn clear_word_end_association(&mut self, keep_word: bool) -> WordEnd<D> {
149 let return_data = self.disassociate();
150
151 if keep_word && return_data.is_some() {
152 self.associate();
153 }
154
155 return_data
156 }
157
158 pub(crate) fn remove_one_word<'b>(
168 &mut self,
169 mut characters: impl Iterator<Item = &'b str>,
170 ) -> RemoveData<D> {
171 let next_character = match characters.next() {
172 None => {
173 return RemoveData {
174 must_keep: false,
175 data: self.disassociate(),
176 }
177 }
178 Some(char) => char,
179 };
180
181 let next_node = self.children.get_mut(next_character).unwrap();
182 let must_keep = next_node.remove_one_word(characters);
183
184 if self.children.len() > 1 || must_keep.must_keep {
185 return RemoveData {
186 must_keep: true,
187 data: must_keep.data,
188 };
189 }
190 self.clear_children();
191
192 RemoveData {
193 must_keep: self.is_associated(),
194 data: must_keep.data,
195 }
196 }
197
198 pub(crate) fn associate(&mut self) {
200 self.word_end_data = Some(ThinVec::new());
201 }
202
203 pub(crate) fn disassociate(&mut self) -> WordEnd<D> {
205 self.word_end_data.take()
206 }
207
208 pub(crate) fn is_associated(&self) -> bool {
210 self.word_end_data.is_some()
211 }
212
213 pub(crate) fn get_association(&self) -> &WordEnd<D> {
215 &self.word_end_data
216 }
217
218 pub(crate) fn get_association_mut(&mut self) -> &mut WordEnd<D> {
220 &mut self.word_end_data
221 }
222
223 pub(crate) fn clear_children(&mut self) {
225 self.children = Default::default();
226 }
227}
228
229impl<D> ops::AddAssign for TrieDataNode<D> {
230 fn add_assign(&mut self, rhs: Self) {
241 for (char, mut rhs_next_node) in rhs.children.into_iter() {
242 match self.children.remove(&*char) {
244 Some(mut self_next_node) => {
246 if let Some(data_vec_rhs) = rhs_next_node.word_end_data.take() {
249 if let Some(data_vec_self) = &mut self_next_node.word_end_data {
250 data_vec_self.extend(data_vec_rhs);
251 } else {
252 self_next_node.word_end_data = Some(data_vec_rhs);
253 }
254 }
255
256 self_next_node += rhs_next_node;
257 self.children.insert(char, self_next_node);
258 }
259 None => {
262 self.children.insert(char, rhs_next_node);
263 }
264 }
265 }
266 }
267}
268
269impl<D: PartialEq> PartialEq for TrieDataNode<D> {
270 fn eq(&self, other: &Self) -> bool {
272 if !(self.children.len() == other.children.len()
274 && self.children.keys().all(|k| other.children.contains_key(k)))
275 {
276 return false;
277 }
278
279 if !match (&self.word_end_data, &other.word_end_data) {
281 (Some(self_vec), Some(other_vec)) => {
282 self_vec.len() == other_vec.len() && self_vec.iter().all(|k| other_vec.contains(k))
284 }
285 (None, None) => true,
287 _ => false,
288 } {
289 return false;
290 }
291
292 self.children
294 .iter()
295 .map(|(char, self_child)| (self_child, other.children.get(char).unwrap()))
296 .all(|(self_child, other_child)| other_child == self_child)
297 }
298}