1#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9#![warn(clippy::all)]
10
11use std::collections::HashMap;
12
13pub struct SymbolIndex {
17 trie: SymbolTrie,
19 inverted_index: HashMap<String, Vec<String>>,
21}
22
23struct SymbolTrie {
25 children: HashMap<char, Box<SymbolTrie>>,
27 symbols: Vec<String>,
29}
30
31impl Default for SymbolIndex {
32 fn default() -> Self {
33 Self::new()
34 }
35}
36
37impl SymbolIndex {
38 #[must_use]
40 pub fn new() -> Self {
41 Self { trie: SymbolTrie::new(), inverted_index: HashMap::new() }
42 }
43
44 pub fn add_symbol(&mut self, symbol: String) {
48 self.trie.insert(&symbol);
50
51 let tokens = Self::tokenize(&symbol);
53 for token in tokens {
54 self.inverted_index.entry(token).or_default().push(symbol.clone());
55 }
56 }
57
58 #[must_use]
62 pub fn search_prefix(&self, prefix: &str) -> Vec<String> {
63 self.trie.search_prefix(prefix)
64 }
65
66 #[must_use]
70 pub fn search_fuzzy(&self, query: &str) -> Vec<String> {
71 let tokens = Self::tokenize(query);
72 let mut results = HashMap::new();
73
74 for token in tokens {
75 if let Some(symbols) = self.inverted_index.get(&token) {
76 for symbol in symbols {
77 *results.entry(symbol.clone()).or_insert(0) += 1;
78 }
79 }
80 }
81
82 let mut sorted: Vec<_> = results.into_iter().collect();
84 sorted.sort_by(|(_, a), (_, b)| b.cmp(a));
85
86 sorted.into_iter().map(|(symbol, _)| symbol).collect()
87 }
88
89 fn tokenize(s: &str) -> Vec<String> {
90 let mut tokens = Vec::new();
92 let mut current = String::new();
93 let mut prev_upper = false;
94
95 for ch in s.chars() {
96 if ch.is_uppercase() && !prev_upper && !current.is_empty() {
97 tokens.push(current.to_lowercase());
98 current = String::new();
99 }
100
101 if ch.is_alphanumeric() {
102 current.push(ch);
103 prev_upper = ch.is_uppercase();
104 } else if !current.is_empty() {
105 tokens.push(current.to_lowercase());
106 current = String::new();
107 prev_upper = false;
108 }
109 }
110
111 if !current.is_empty() {
112 tokens.push(current.to_lowercase());
113 }
114
115 tokens
116 }
117}
118
119impl SymbolTrie {
120 fn new() -> Self {
121 Self { children: HashMap::new(), symbols: Vec::new() }
122 }
123
124 fn insert(&mut self, symbol: &str) {
125 let mut node = self;
126
127 for ch in symbol.chars() {
128 node = node.children.entry(ch).or_insert_with(|| Box::new(SymbolTrie::new()));
129 }
130
131 node.symbols.push(symbol.to_string());
132 }
133
134 fn search_prefix(&self, prefix: &str) -> Vec<String> {
135 let mut node = self;
136
137 for ch in prefix.chars() {
138 match node.children.get(&ch) {
139 Some(child) => node = child,
140 None => return Vec::new(),
141 }
142 }
143
144 let mut results = Vec::new();
146 Self::collect_all(node, &mut results);
147 results
148 }
149
150 fn collect_all(node: &SymbolTrie, results: &mut Vec<String>) {
151 results.extend(node.symbols.clone());
152
153 for child in node.children.values() {
154 Self::collect_all(child, results);
155 }
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 use super::SymbolIndex;
162
163 #[test]
164 fn indexes_symbols_for_prefix_and_fuzzy_search() {
165 let mut index = SymbolIndex::new();
166
167 index.add_symbol("calculate_total".to_string());
168 index.add_symbol("calculateAverage".to_string());
169 index.add_symbol("get_user_name".to_string());
170
171 let prefix_results = index.search_prefix("calc");
172 assert_eq!(prefix_results.len(), 2);
173 assert!(prefix_results.contains(&"calculate_total".to_string()));
174 assert!(prefix_results.contains(&"calculateAverage".to_string()));
175
176 let fuzzy_results = index.search_fuzzy("user name");
177 assert!(fuzzy_results.contains(&"get_user_name".to_string()));
178 }
179}