Skip to main content

agentic_codebase/index/
symbol_index.rs

1//! Index by symbol name for fast lookup.
2//!
3//! Provides O(1) exact name lookup, case-insensitive search, prefix search
4//! via binary search on sorted names, and substring search via linear scan.
5
6use std::collections::HashMap;
7
8use crate::graph::CodeGraph;
9
10/// Index that maps symbol names to code unit IDs for O(1) lookup.
11#[derive(Debug, Clone, Default)]
12pub struct SymbolIndex {
13    /// Exact name -> list of unit IDs.
14    exact: HashMap<String, Vec<u64>>,
15    /// Lowercase name -> list of unit IDs (for case-insensitive search).
16    lowercase: HashMap<String, Vec<u64>>,
17    /// Sorted (lowercase name, unit ID) pairs for prefix search (binary search).
18    sorted_names: Vec<(String, u64)>,
19}
20
21impl SymbolIndex {
22    /// Build a `SymbolIndex` from all code units in the given graph.
23    ///
24    /// Iterates over every unit once, populating the exact, lowercase, and
25    /// sorted name collections.
26    pub fn build(graph: &CodeGraph) -> Self {
27        let mut exact: HashMap<String, Vec<u64>> = HashMap::new();
28        let mut lowercase: HashMap<String, Vec<u64>> = HashMap::new();
29        let mut sorted_names: Vec<(String, u64)> = Vec::with_capacity(graph.unit_count());
30
31        for unit in graph.units() {
32            exact.entry(unit.name.clone()).or_default().push(unit.id);
33
34            let lower = unit.name.to_lowercase();
35            lowercase.entry(lower.clone()).or_default().push(unit.id);
36
37            sorted_names.push((lower, unit.id));
38        }
39
40        sorted_names.sort_by(|a, b| a.0.cmp(&b.0));
41
42        Self {
43            exact,
44            lowercase,
45            sorted_names,
46        }
47    }
48
49    /// Look up unit IDs by exact symbol name (case-sensitive).
50    ///
51    /// Returns an empty slice if no units match.
52    pub fn lookup_exact(&self, name: &str) -> &[u64] {
53        self.exact.get(name).map(|v| v.as_slice()).unwrap_or(&[])
54    }
55
56    /// Look up unit IDs whose lowercase name starts with the given prefix
57    /// (case-insensitive).
58    ///
59    /// Uses binary search on sorted names for efficient prefix matching.
60    pub fn lookup_prefix(&self, prefix: &str) -> Vec<u64> {
61        let prefix_lower = prefix.to_lowercase();
62        if prefix_lower.is_empty() {
63            return self.sorted_names.iter().map(|(_, id)| *id).collect();
64        }
65
66        // Find the first entry >= prefix using binary search.
67        let start = self
68            .sorted_names
69            .partition_point(|(name, _)| name.as_str() < prefix_lower.as_str());
70
71        let mut results = Vec::new();
72        for (name, id) in &self.sorted_names[start..] {
73            if name.starts_with(&prefix_lower) {
74                results.push(*id);
75            } else {
76                break;
77            }
78        }
79        results
80    }
81
82    /// Look up unit IDs whose lowercase name contains the given substring
83    /// (case-insensitive).
84    ///
85    /// This is a linear scan over the lowercase map; use `lookup_prefix` or
86    /// `lookup_exact` when possible.
87    pub fn lookup_contains(&self, substring: &str) -> Vec<u64> {
88        let sub_lower = substring.to_lowercase();
89        let mut results = Vec::new();
90        for (name, ids) in &self.lowercase {
91            if name.contains(&sub_lower) {
92                results.extend(ids.iter().copied());
93            }
94        }
95        results
96    }
97
98    /// Returns the number of distinct exact symbol names in the index.
99    pub fn len(&self) -> usize {
100        self.exact.len()
101    }
102
103    /// Returns `true` if the index contains no entries.
104    pub fn is_empty(&self) -> bool {
105        self.exact.is_empty()
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112    use crate::graph::CodeGraph;
113    use crate::types::{CodeUnit, CodeUnitType, Language, Span};
114    use std::path::PathBuf;
115
116    fn make_unit(name: &str) -> CodeUnit {
117        CodeUnit::new(
118            CodeUnitType::Function,
119            Language::Rust,
120            name.to_string(),
121            format!("mod::{name}"),
122            PathBuf::from("src/lib.rs"),
123            Span::new(1, 0, 10, 0),
124        )
125    }
126
127    #[test]
128    fn test_empty_index() {
129        let graph = CodeGraph::default();
130        let index = SymbolIndex::build(&graph);
131        assert!(index.is_empty());
132        assert_eq!(index.len(), 0);
133        assert_eq!(index.lookup_exact("foo"), &[] as &[u64]);
134    }
135
136    #[test]
137    fn test_exact_lookup() {
138        let mut graph = CodeGraph::default();
139        graph.add_unit(make_unit("process_payment"));
140        graph.add_unit(make_unit("validate_input"));
141
142        let index = SymbolIndex::build(&graph);
143        assert_eq!(index.len(), 2);
144        assert_eq!(index.lookup_exact("process_payment"), &[0]);
145        assert_eq!(index.lookup_exact("validate_input"), &[1]);
146        assert_eq!(index.lookup_exact("nonexistent"), &[] as &[u64]);
147    }
148
149    #[test]
150    fn test_prefix_lookup() {
151        let mut graph = CodeGraph::default();
152        graph.add_unit(make_unit("process_payment"));
153        graph.add_unit(make_unit("process_refund"));
154        graph.add_unit(make_unit("validate_input"));
155
156        let index = SymbolIndex::build(&graph);
157        let mut results = index.lookup_prefix("process");
158        results.sort();
159        assert_eq!(results, vec![0, 1]);
160    }
161
162    #[test]
163    fn test_contains_lookup() {
164        let mut graph = CodeGraph::default();
165        graph.add_unit(make_unit("process_payment"));
166        graph.add_unit(make_unit("validate_payment"));
167        graph.add_unit(make_unit("send_email"));
168
169        let index = SymbolIndex::build(&graph);
170        let mut results = index.lookup_contains("payment");
171        results.sort();
172        assert_eq!(results, vec![0, 1]);
173    }
174
175    #[test]
176    fn test_case_insensitive_prefix() {
177        let mut graph = CodeGraph::default();
178        graph.add_unit(make_unit("ProcessPayment"));
179        graph.add_unit(make_unit("processRefund"));
180
181        let index = SymbolIndex::build(&graph);
182        let mut results = index.lookup_prefix("PROCESS");
183        results.sort();
184        assert_eq!(results, vec![0, 1]);
185    }
186}