html_keywords_matching/
trie.rs1use std::{
2 cell::RefCell,
3 collections::{HashMap, VecDeque},
4 rc::Rc,
5};
6
7use crate::{MatchResult, Trie, TrieNode, TrieNodeRef};
8
9impl TrieNode {
10 fn new(depth: usize) -> Self {
11 TrieNode {
12 is_end_words: false,
13 children: HashMap::new(),
14 fail: None,
15 depth: depth,
16 }
17 }
18
19 fn get_child(&self, c: &char) -> Option<TrieNodeRef> {
20 self.children.get(c).map(|x| x.clone())
21 }
22
23 fn get_fail(&self) -> Option<TrieNodeRef> {
24 self.fail.as_ref().map(|x| x.clone())
25 }
26}
27
28impl Trie {
29 pub fn new() -> Self {
30 Trie {
31 root: Rc::new(RefCell::new(TrieNode::new(0))),
32 }
33 }
34
35 pub fn build(&mut self, words: Vec<String>) {
36 for word in words {
37 self.insert(word);
38 }
39 self.build_fail_point()
40 }
41
42 fn insert(&mut self, word: String) {
43 let mut p_ref = self.root.clone();
44 for c in word.chars() {
45 let child = p_ref.borrow().get_child(&c);
46 match child {
47 Some(child) => {
48 p_ref = child;
49 }
50 None => {
51 let mut new_node = TrieNode::new(p_ref.borrow().depth + 1);
52 new_node.fail = Some(self.root.clone());
53 let new_node_ref = Rc::new(RefCell::new(new_node));
54 p_ref.borrow_mut().children.insert(c, new_node_ref.clone());
55 p_ref = new_node_ref;
56 }
57 }
58 }
59 p_ref.borrow_mut().is_end_words = true;
60 }
61
62 pub fn build_fail_point(&mut self) {
63 let mut queue = VecDeque::<TrieNodeRef>::new();
64 {
65 for (_, child) in &self.root.as_ref().borrow().children {
66 queue.push_back(child.clone());
67 child.borrow_mut().fail = Some(self.root.clone());
68 }
69 }
70
71 while let Some(p) = queue.pop_front() {
72 for (c, child) in &p.borrow().children {
73 let mut fail = p.borrow().get_fail();
74 while let Some(fail_ref) = fail {
75 let fail_child = fail_ref.borrow().get_child(c);
76 if fail_child.is_some() {
77 child.borrow_mut().fail = fail_child;
78 queue.push_back(child.clone());
79 break;
80 }
81 fail = fail_ref.borrow().fail.clone();
82 }
83 }
84 }
85 }
86
87 pub fn search_replace(&self, text: &String) -> MatchResult {
88 let mut p_ref = self.root.clone();
89 let mut match_result = HashMap::<String, usize>::new();
90 let mut modify_text = String::new();
91 let mut byte_index: Vec<usize> = vec![];
92 for (i, mut c) in text.char_indices() {
93 c = c.to_ascii_lowercase();
94 byte_index.push(i);
95 let mut child = p_ref.borrow().get_child(&c);
96 while child.is_none() {
97 let fail = p_ref.borrow().get_fail();
98 match fail {
99 Some(fail_ref) => {
100 p_ref = fail_ref;
101 child = p_ref.borrow().get_child(&c);
102 }
103 None => {
104 break;
105 }
106 }
107 }
108 match child {
109 Some(child) => {
110 p_ref = child;
111 if p_ref.borrow().is_end_words {
112 let word_len = p_ref.borrow().depth;
113 let start = byte_index[byte_index.len() - word_len];
114 let end = i + c.len_utf8();
115 let word = &text[start..end].to_ascii_lowercase().to_string();
116 let count = match_result.entry(word.to_string()).or_insert(0);
117 *count += 1;
118 for _ in 0..word_len - 1 {
120 modify_text.pop();
121 }
122
123 modify_text
125 .push_str(format!("<span class=\"keyword\">{}</span>", word).as_str());
126 } else {
127 modify_text.push(c);
128 }
129 }
130 None => {
131 p_ref = self.root.clone();
132 modify_text.push(c);
133 }
134 }
135 }
136 MatchResult {
137 match_words: match_result,
138 modified_html: modify_text,
139 }
140 }
141}
142
143mod tests {
144
145 #[test]
146 fn test_trie() {
147 use crate::Trie;
148 let mut trie = Trie::new();
149 trie.build(vec![
150 "he".to_string(),
151 "her".to_string(),
152 "say".to_string(),
153 "she".to_string(),
154 "shr".to_string(),
155 ]);
156 trie.build_fail_point();
157 assert!(trie.root.borrow().get_child(&'h').is_some());
158 assert!(trie.root.borrow().get_child(&'s').is_some());
159 assert!(trie.root.borrow().get_child(&'a').is_none());
160
161 let h_node = trie.root.borrow().get_child(&'h').unwrap();
162 assert!(h_node.borrow().get_child(&'e').is_some());
163 assert!(h_node.borrow().get_child(&'s').is_none());
164
165 let s_node = trie.root.borrow().get_child(&'s').unwrap();
166 assert!(s_node.borrow().get_child(&'h').is_some());
167 assert!(s_node.borrow().get_child(&'a').is_some());
168 assert!(s_node.borrow().get_child(&'e').is_none());
169
170 let sh_node = s_node.borrow().get_child(&'h').unwrap();
171 assert!(sh_node.borrow().get_child(&'r').is_some());
172 assert!(sh_node.borrow().get_child(&'e').is_some());
173
174 assert!(sh_node.borrow().get_fail().is_some());
175
176 let she_node = sh_node.borrow().get_child(&'e').unwrap();
177 assert!(she_node.borrow().get_fail().is_some());
178 assert!(she_node.borrow().is_end_words == true);
179 }
180}