html_keywords_matching/
trie.rs

1use std::{
2    cell::RefCell,
3    collections::{HashMap, VecDeque},
4    rc::Rc,
5};
6
7use crate::{MatchResult, Trie, TrieNode, TrieNodeRef};
8
9impl TrieNode {
10    fn new(depth: usize) -> Self {
11        TrieNode {
12            is_end_words: false,
13            children: HashMap::new(),
14            fail: None,
15            depth: depth,
16        }
17    }
18
19    fn get_child(&self, c: &char) -> Option<TrieNodeRef> {
20        self.children.get(c).map(|x| x.clone())
21    }
22
23    fn get_fail(&self) -> Option<TrieNodeRef> {
24        self.fail.as_ref().map(|x| x.clone())
25    }
26}
27
28impl Trie {
29    pub fn new() -> Self {
30        Trie {
31            root: Rc::new(RefCell::new(TrieNode::new(0))),
32        }
33    }
34
35    pub fn build(&mut self, words: Vec<String>) {
36        for word in words {
37            self.insert(word);
38        }
39        self.build_fail_point()
40    }
41
42    fn insert(&mut self, word: String) {
43        let mut p_ref = self.root.clone();
44        for c in word.chars() {
45            let child = p_ref.borrow().get_child(&c);
46            match child {
47                Some(child) => {
48                    p_ref = child;
49                }
50                None => {
51                    let mut new_node = TrieNode::new(p_ref.borrow().depth + 1);
52                    new_node.fail = Some(self.root.clone());
53                    let new_node_ref = Rc::new(RefCell::new(new_node));
54                    p_ref.borrow_mut().children.insert(c, new_node_ref.clone());
55                    p_ref = new_node_ref;
56                }
57            }
58        }
59        p_ref.borrow_mut().is_end_words = true;
60    }
61
62    pub fn build_fail_point(&mut self) {
63        let mut queue = VecDeque::<TrieNodeRef>::new();
64        {
65            for (_, child) in &self.root.as_ref().borrow().children {
66                queue.push_back(child.clone());
67                child.borrow_mut().fail = Some(self.root.clone());
68            }
69        }
70
71        while let Some(p) = queue.pop_front() {
72            for (c, child) in &p.borrow().children {
73                let mut fail = p.borrow().get_fail();
74                while let Some(fail_ref) = fail {
75                    let fail_child = fail_ref.borrow().get_child(c);
76                    if fail_child.is_some() {
77                        child.borrow_mut().fail = fail_child;
78                        queue.push_back(child.clone());
79                        break;
80                    }
81                    fail = fail_ref.borrow().fail.clone();
82                }
83            }
84        }
85    }
86
87    pub fn search_replace(&self, text: &String) -> MatchResult {
88        let mut p_ref = self.root.clone();
89        let mut match_result = HashMap::<String, usize>::new();
90        let mut modify_text = String::new();
91        let mut byte_index: Vec<usize> = vec![];
92        for (i, mut c) in text.char_indices() {
93            c = c.to_ascii_lowercase();
94            byte_index.push(i);
95            let mut child = p_ref.borrow().get_child(&c);
96            while child.is_none() {
97                let fail = p_ref.borrow().get_fail();
98                match fail {
99                    Some(fail_ref) => {
100                        p_ref = fail_ref;
101                        child = p_ref.borrow().get_child(&c);
102                    }
103                    None => {
104                        break;
105                    }
106                }
107            }
108            match child {
109                Some(child) => {
110                    p_ref = child;
111                    if p_ref.borrow().is_end_words {
112                        let word_len = p_ref.borrow().depth;
113                        let start = byte_index[byte_index.len() - word_len];
114                        let end = i + c.len_utf8();
115                        let word = &text[start..end].to_ascii_lowercase().to_string();
116                        let count = match_result.entry(word.to_string()).or_insert(0);
117                        *count += 1;
118                        // pop word_len-1 char from modify_text
119                        for _ in 0..word_len - 1 {
120                            modify_text.pop();
121                        }
122
123                        // push
124                        modify_text
125                            .push_str(format!("<span class=\"keyword\">{}</span>", word).as_str());
126                    } else {
127                        modify_text.push(c);
128                    }
129                }
130                None => {
131                    p_ref = self.root.clone();
132                    modify_text.push(c);
133                }
134            }
135        }
136        MatchResult {
137            match_words: match_result,
138            modified_html: modify_text,
139        }
140    }
141}
142
143mod tests {
144
145    #[test]
146    fn test_trie() {
147        use crate::Trie;
148        let mut trie = Trie::new();
149        trie.build(vec![
150            "he".to_string(),
151            "her".to_string(),
152            "say".to_string(),
153            "she".to_string(),
154            "shr".to_string(),
155        ]);
156        trie.build_fail_point();
157        assert!(trie.root.borrow().get_child(&'h').is_some());
158        assert!(trie.root.borrow().get_child(&'s').is_some());
159        assert!(trie.root.borrow().get_child(&'a').is_none());
160
161        let h_node = trie.root.borrow().get_child(&'h').unwrap();
162        assert!(h_node.borrow().get_child(&'e').is_some());
163        assert!(h_node.borrow().get_child(&'s').is_none());
164
165        let s_node = trie.root.borrow().get_child(&'s').unwrap();
166        assert!(s_node.borrow().get_child(&'h').is_some());
167        assert!(s_node.borrow().get_child(&'a').is_some());
168        assert!(s_node.borrow().get_child(&'e').is_none());
169
170        let sh_node = s_node.borrow().get_child(&'h').unwrap();
171        assert!(sh_node.borrow().get_child(&'r').is_some());
172        assert!(sh_node.borrow().get_child(&'e').is_some());
173
174        assert!(sh_node.borrow().get_fail().is_some());
175
176        let she_node = sh_node.borrow().get_child(&'e').unwrap();
177        assert!(she_node.borrow().get_fail().is_some());
178        assert!(she_node.borrow().is_end_words == true);
179    }
180}