Skip to main content

perl_symbol_index/
lib.rs

1//! Symbol search index primitives.
2//!
3//! This crate has one responsibility: indexing symbol names for fast lookup
4//! across prefix and fuzzy query styles.
5
6#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9#![warn(clippy::all)]
10
11use std::collections::HashMap;
12
13/// Symbol index for fast lookups.
14///
15/// Supports both prefix and fuzzy matching using a trie and inverted index.
16pub struct SymbolIndex {
17    /// Trie structure for prefix matching
18    trie: SymbolTrie,
19    /// Inverted index for fuzzy matching
20    inverted_index: HashMap<String, Vec<String>>,
21}
22
23/// Trie data structure for efficient prefix matching
24struct SymbolTrie {
25    /// Child nodes indexed by character
26    children: HashMap<char, Box<SymbolTrie>>,
27    /// Symbols stored at this node
28    symbols: Vec<String>,
29}
30
31impl Default for SymbolIndex {
32    fn default() -> Self {
33        Self::new()
34    }
35}
36
37impl SymbolIndex {
38    /// Create a new empty symbol index.
39    #[must_use]
40    pub fn new() -> Self {
41        Self { trie: SymbolTrie::new(), inverted_index: HashMap::new() }
42    }
43
44    /// Add a symbol to the index.
45    ///
46    /// Indexes the symbol for both prefix and fuzzy matching.
47    pub fn add_symbol(&mut self, symbol: String) {
48        // Add to trie for prefix matching
49        self.trie.insert(&symbol);
50
51        // Add to inverted index for fuzzy matching
52        let tokens = Self::tokenize(&symbol);
53        for token in tokens {
54            self.inverted_index.entry(token).or_default().push(symbol.clone());
55        }
56    }
57
58    /// Search symbols with prefix.
59    ///
60    /// Returns all symbols starting with the given prefix.
61    #[must_use]
62    pub fn search_prefix(&self, prefix: &str) -> Vec<String> {
63        self.trie.search_prefix(prefix)
64    }
65
66    /// Fuzzy search symbols.
67    ///
68    /// Returns symbols matching any of the tokenized query words, sorted by relevance.
69    #[must_use]
70    pub fn search_fuzzy(&self, query: &str) -> Vec<String> {
71        let tokens = Self::tokenize(query);
72        let mut results = HashMap::new();
73
74        for token in tokens {
75            if let Some(symbols) = self.inverted_index.get(&token) {
76                for symbol in symbols {
77                    *results.entry(symbol.clone()).or_insert(0) += 1;
78                }
79            }
80        }
81
82        // Sort by relevance (number of matching tokens)
83        let mut sorted: Vec<_> = results.into_iter().collect();
84        sorted.sort_by(|(_, a), (_, b)| b.cmp(a));
85
86        sorted.into_iter().map(|(symbol, _)| symbol).collect()
87    }
88
89    fn tokenize(s: &str) -> Vec<String> {
90        // Split on word boundaries and case changes
91        let mut tokens = Vec::new();
92        let mut current = String::new();
93        let mut prev_upper = false;
94
95        for ch in s.chars() {
96            if ch.is_uppercase() && !prev_upper && !current.is_empty() {
97                tokens.push(current.to_lowercase());
98                current = String::new();
99            }
100
101            if ch.is_alphanumeric() {
102                current.push(ch);
103                prev_upper = ch.is_uppercase();
104            } else if !current.is_empty() {
105                tokens.push(current.to_lowercase());
106                current = String::new();
107                prev_upper = false;
108            }
109        }
110
111        if !current.is_empty() {
112            tokens.push(current.to_lowercase());
113        }
114
115        tokens
116    }
117}
118
119impl SymbolTrie {
120    fn new() -> Self {
121        Self { children: HashMap::new(), symbols: Vec::new() }
122    }
123
124    fn insert(&mut self, symbol: &str) {
125        let mut node = self;
126
127        for ch in symbol.chars() {
128            node = node.children.entry(ch).or_insert_with(|| Box::new(SymbolTrie::new()));
129        }
130
131        node.symbols.push(symbol.to_string());
132    }
133
134    fn search_prefix(&self, prefix: &str) -> Vec<String> {
135        let mut node = self;
136
137        for ch in prefix.chars() {
138            match node.children.get(&ch) {
139                Some(child) => node = child,
140                None => return Vec::new(),
141            }
142        }
143
144        // Collect all symbols from this node and descendants
145        let mut results = Vec::new();
146        Self::collect_all(node, &mut results);
147        results
148    }
149
150    fn collect_all(node: &SymbolTrie, results: &mut Vec<String>) {
151        results.extend(node.symbols.clone());
152
153        for child in node.children.values() {
154            Self::collect_all(child, results);
155        }
156    }
157}
158
159#[cfg(test)]
160mod tests {
161    use super::SymbolIndex;
162
163    #[test]
164    fn indexes_symbols_for_prefix_and_fuzzy_search() {
165        let mut index = SymbolIndex::new();
166
167        index.add_symbol("calculate_total".to_string());
168        index.add_symbol("calculateAverage".to_string());
169        index.add_symbol("get_user_name".to_string());
170
171        let prefix_results = index.search_prefix("calc");
172        assert_eq!(prefix_results.len(), 2);
173        assert!(prefix_results.contains(&"calculate_total".to_string()));
174        assert!(prefix_results.contains(&"calculateAverage".to_string()));
175
176        let fuzzy_results = index.search_fuzzy("user name");
177        assert!(fuzzy_results.contains(&"get_user_name".to_string()));
178    }
179}