greppy/trace/
index.rs

1//! Semantic Index - The core in-memory data structure
2//!
3//! SemanticIndex holds all symbols, tokens, references, scopes, and edges
4//! with fast lookup structures for instant queries.
5//!
6//! @module trace/index
7
8use std::collections::HashMap;
9use std::path::PathBuf;
10
11use compact_str::CompactString;
12use smallvec::SmallVec;
13
14use super::types::{Edge, RefKind, Reference, Scope, Symbol, SymbolKind, Token};
15
16// =============================================================================
17// STRING TABLE
18// =============================================================================
19
20/// Interned string storage for memory efficiency
21///
22/// All symbol and token names are stored once in this table,
23/// and structures reference them by offset.
24#[derive(Debug, Clone, Default)]
25pub struct StringTable {
26    /// Raw bytes of all interned strings (null-terminated)
27    data: Vec<u8>,
28    /// Map from string to offset for deduplication
29    lookup: HashMap<CompactString, u32>,
30}
31
32impl StringTable {
33    /// Create a new empty string table
34    pub fn new() -> Self {
35        Self::default()
36    }
37
38    /// Create with pre-allocated capacity
39    pub fn with_capacity(capacity: usize) -> Self {
40        Self {
41            data: Vec::with_capacity(capacity),
42            lookup: HashMap::with_capacity(capacity / 16), // Assume ~16 bytes per string
43        }
44    }
45
46    /// Intern a string, returning its offset
47    ///
48    /// If the string already exists, returns the existing offset.
49    pub fn intern(&mut self, s: &str) -> u32 {
50        // Check if already interned
51        let compact = CompactString::new(s);
52        if let Some(&offset) = self.lookup.get(&compact) {
53            return offset;
54        }
55
56        // Add new string
57        let offset = self.data.len() as u32;
58        self.data.extend_from_slice(s.as_bytes());
59        self.data.push(0); // Null terminator
60        self.lookup.insert(compact, offset);
61        offset
62    }
63
64    /// Get a string by offset
65    ///
66    /// # Safety
67    /// The offset must be valid and point to a null-terminated string
68    pub fn get(&self, offset: u32) -> Option<&str> {
69        let start = offset as usize;
70        if start >= self.data.len() {
71            return None;
72        }
73
74        // Find the null terminator
75        let end = self.data[start..]
76            .iter()
77            .position(|&b| b == 0)
78            .map(|pos| start + pos)?;
79
80        std::str::from_utf8(&self.data[start..end]).ok()
81    }
82
83    /// Get the raw bytes
84    pub fn as_bytes(&self) -> &[u8] {
85        &self.data
86    }
87
88    /// Load from raw bytes (for mmap)
89    pub fn from_bytes(data: Vec<u8>) -> Self {
90        // Rebuild lookup table
91        let mut lookup = HashMap::new();
92        let mut offset = 0usize;
93        while offset < data.len() {
94            // Find null terminator
95            let end = data[offset..]
96                .iter()
97                .position(|&b| b == 0)
98                .map(|pos| offset + pos);
99
100            if let Some(end) = end {
101                if let Ok(s) = std::str::from_utf8(&data[offset..end]) {
102                    lookup.insert(CompactString::new(s), offset as u32);
103                }
104                offset = end + 1;
105            } else {
106                break;
107            }
108        }
109
110        Self { data, lookup }
111    }
112
113    /// Number of interned strings
114    pub fn len(&self) -> usize {
115        self.lookup.len()
116    }
117
118    /// Check if empty
119    pub fn is_empty(&self) -> bool {
120        self.lookup.is_empty()
121    }
122
123    /// Total bytes used
124    pub fn byte_size(&self) -> usize {
125        self.data.len()
126    }
127}
128
129// =============================================================================
130// SEMANTIC INDEX
131// =============================================================================
132
133/// The main semantic index containing all code structure information
134///
135/// This structure is designed for:
136/// - Fast lookup by name (HashMap)
137/// - Fast traversal (adjacency lists with SmallVec)
138/// - Memory efficiency (interned strings, compact types)
139/// - Mmap compatibility (repr(C) data types)
140#[derive(Debug)]
141pub struct SemanticIndex {
142    // -------------------------------------------------------------------------
143    // Primary Data (mmap'd)
144    // -------------------------------------------------------------------------
145    /// All symbol definitions
146    pub symbols: Vec<Symbol>,
147
148    /// All token occurrences
149    pub tokens: Vec<Token>,
150
151    /// All symbol references
152    pub references: Vec<Reference>,
153
154    /// All scopes
155    pub scopes: Vec<Scope>,
156
157    /// All call graph edges
158    pub edges: Vec<Edge>,
159
160    // -------------------------------------------------------------------------
161    // Fast Lookups (built at load time)
162    // -------------------------------------------------------------------------
163    /// Symbol lookup by name -> list of symbol IDs with that name
164    /// SmallVec<[u32; 4]> because most names have 1-4 definitions
165    pub symbol_by_name: HashMap<CompactString, SmallVec<[u32; 4]>>,
166
167    /// Token lookup by name -> list of token IDs with that name
168    pub token_by_name: HashMap<CompactString, Vec<u32>>,
169
170    /// Incoming edges per symbol (who calls this symbol)
171    /// SmallVec<[u32; 8]> because most functions have <8 callers
172    pub incoming_edges: Vec<SmallVec<[u32; 8]>>,
173
174    /// Outgoing edges per symbol (what does this symbol call)
175    /// SmallVec<[u32; 8]> because most functions call <8 others
176    pub outgoing_edges: Vec<SmallVec<[u32; 8]>>,
177
178    /// References to each symbol (index by symbol_id)
179    pub refs_to_symbol: Vec<Vec<u32>>,
180
181    // -------------------------------------------------------------------------
182    // Metadata
183    // -------------------------------------------------------------------------
184    /// List of indexed files (index = file_id)
185    pub files: Vec<PathBuf>,
186
187    /// Interned string storage
188    pub strings: StringTable,
189
190    /// Entry point symbol IDs (for fast traversal starting points)
191    pub entry_points: Vec<u32>,
192}
193
194impl SemanticIndex {
195    /// Create a new empty semantic index
196    pub fn new() -> Self {
197        Self {
198            symbols: Vec::new(),
199            tokens: Vec::new(),
200            references: Vec::new(),
201            scopes: Vec::new(),
202            edges: Vec::new(),
203            symbol_by_name: HashMap::new(),
204            token_by_name: HashMap::new(),
205            incoming_edges: Vec::new(),
206            outgoing_edges: Vec::new(),
207            refs_to_symbol: Vec::new(),
208            files: Vec::new(),
209            strings: StringTable::new(),
210            entry_points: Vec::new(),
211        }
212    }
213
214    /// Create with estimated capacity for better allocation
215    pub fn with_capacity(
216        symbols: usize,
217        tokens: usize,
218        references: usize,
219        scopes: usize,
220        edges: usize,
221        files: usize,
222    ) -> Self {
223        Self {
224            symbols: Vec::with_capacity(symbols),
225            tokens: Vec::with_capacity(tokens),
226            references: Vec::with_capacity(references),
227            scopes: Vec::with_capacity(scopes),
228            edges: Vec::with_capacity(edges),
229            symbol_by_name: HashMap::with_capacity(symbols),
230            token_by_name: HashMap::with_capacity(tokens / 4), // Many tokens share names
231            incoming_edges: Vec::with_capacity(symbols),
232            outgoing_edges: Vec::with_capacity(symbols),
233            refs_to_symbol: Vec::with_capacity(symbols),
234            files: Vec::with_capacity(files),
235            strings: StringTable::with_capacity(symbols * 20), // ~20 bytes per symbol name
236            entry_points: Vec::with_capacity(files),           // ~1 entry point per file
237        }
238    }
239
240    // -------------------------------------------------------------------------
241    // Building Methods
242    // -------------------------------------------------------------------------
243
244    /// Add a file to the index, returning its file_id
245    pub fn add_file(&mut self, path: PathBuf) -> u16 {
246        let id = self.files.len() as u16;
247        self.files.push(path);
248        id
249    }
250
251    /// Add a symbol to the index
252    pub fn add_symbol(&mut self, symbol: Symbol, name: &str) {
253        let id = symbol.id as usize;
254
255        // Store the symbol
256        if id >= self.symbols.len() {
257            self.symbols.resize(
258                id + 1,
259                Symbol::new(0, 0, 0, SymbolKind::Unknown, Default::default(), 0, 0),
260            );
261        }
262        self.symbols[id] = symbol;
263
264        // Update name lookup
265        let compact_name = CompactString::new(name);
266        self.symbol_by_name
267            .entry(compact_name)
268            .or_insert_with(SmallVec::new)
269            .push(symbol.id);
270
271        // Track entry points
272        if symbol.is_entry_point() {
273            self.entry_points.push(symbol.id);
274        }
275
276        // Ensure adjacency lists are sized
277        while self.incoming_edges.len() <= id {
278            self.incoming_edges.push(SmallVec::new());
279        }
280        while self.outgoing_edges.len() <= id {
281            self.outgoing_edges.push(SmallVec::new());
282        }
283        while self.refs_to_symbol.len() <= id {
284            self.refs_to_symbol.push(Vec::new());
285        }
286    }
287
288    /// Add a token to the index
289    pub fn add_token(&mut self, token: Token, name: &str) {
290        let id = token.id as usize;
291
292        // Store the token
293        if id >= self.tokens.len() {
294            self.tokens.resize(
295                id + 1,
296                Token::new(0, 0, 0, 0, 0, super::types::TokenKind::Unknown, 0),
297            );
298        }
299        self.tokens[id] = token;
300
301        // Update name lookup
302        let compact_name = CompactString::new(name);
303        self.token_by_name
304            .entry(compact_name)
305            .or_insert_with(Vec::new)
306            .push(token.id);
307    }
308
309    /// Add a reference to the index
310    pub fn add_reference(&mut self, reference: Reference) {
311        self.references.push(reference);
312
313        // Update refs_to_symbol lookup
314        let sym_id = reference.symbol_id as usize;
315        if sym_id < self.refs_to_symbol.len() {
316            self.refs_to_symbol[sym_id].push(reference.token_id);
317        }
318    }
319
320    /// Add a scope to the index
321    pub fn add_scope(&mut self, scope: Scope) {
322        let id = scope.id as usize;
323        if id >= self.scopes.len() {
324            self.scopes.resize(id + 1, Scope::file_scope(0, 0, 0));
325        }
326        self.scopes[id] = scope;
327    }
328
329    /// Add an edge to the index
330    pub fn add_edge(&mut self, edge: Edge) {
331        self.edges.push(edge);
332
333        // Update adjacency lists
334        let from = edge.from_symbol as usize;
335        let to = edge.to_symbol as usize;
336
337        if from < self.outgoing_edges.len() {
338            self.outgoing_edges[from].push(edge.to_symbol);
339        }
340        if to < self.incoming_edges.len() {
341            self.incoming_edges[to].push(edge.from_symbol);
342        }
343    }
344
345    /// Rebuild all lookup structures from raw data
346    ///
347    /// Call this after loading from storage to populate HashMaps and adjacency lists.
348    pub fn rebuild_lookups(&mut self) {
349        // Clear existing lookups
350        self.symbol_by_name.clear();
351        self.token_by_name.clear();
352        self.incoming_edges.clear();
353        self.outgoing_edges.clear();
354        self.refs_to_symbol.clear();
355        self.entry_points.clear();
356
357        // Resize adjacency lists
358        self.incoming_edges
359            .resize(self.symbols.len(), SmallVec::new());
360        self.outgoing_edges
361            .resize(self.symbols.len(), SmallVec::new());
362        self.refs_to_symbol.resize(self.symbols.len(), Vec::new());
363
364        // Rebuild symbol_by_name and entry_points
365        for symbol in &self.symbols {
366            if let Some(name) = self.strings.get(symbol.name_offset) {
367                self.symbol_by_name
368                    .entry(CompactString::new(name))
369                    .or_insert_with(SmallVec::new)
370                    .push(symbol.id);
371            }
372            if symbol.is_entry_point() {
373                self.entry_points.push(symbol.id);
374            }
375        }
376
377        // Rebuild token_by_name
378        for token in &self.tokens {
379            if let Some(name) = self.strings.get(token.name_offset) {
380                self.token_by_name
381                    .entry(CompactString::new(name))
382                    .or_insert_with(Vec::new)
383                    .push(token.id);
384            }
385        }
386
387        // Rebuild adjacency lists from edges
388        for edge in &self.edges {
389            let from = edge.from_symbol as usize;
390            let to = edge.to_symbol as usize;
391            if from < self.outgoing_edges.len() {
392                self.outgoing_edges[from].push(edge.to_symbol);
393            }
394            if to < self.incoming_edges.len() {
395                self.incoming_edges[to].push(edge.from_symbol);
396            }
397        }
398
399        // Rebuild refs_to_symbol
400        for reference in &self.references {
401            let sym_id = reference.symbol_id as usize;
402            if sym_id < self.refs_to_symbol.len() {
403                self.refs_to_symbol[sym_id].push(reference.token_id);
404            }
405        }
406    }
407
408    // -------------------------------------------------------------------------
409    // Query Methods
410    // -------------------------------------------------------------------------
411
412    /// Find symbols by exact name
413    pub fn symbols_by_name(&self, name: &str) -> Option<&SmallVec<[u32; 4]>> {
414        self.symbol_by_name.get(&CompactString::new(name))
415    }
416
417    /// Find symbols matching a name pattern (substring match)
418    pub fn symbols_matching(&self, pattern: &str) -> Vec<u32> {
419        let pattern_lower = pattern.to_lowercase();
420        self.symbol_by_name
421            .iter()
422            .filter(|(name, _)| name.to_lowercase().contains(&pattern_lower))
423            .flat_map(|(_, ids)| ids.iter().copied())
424            .collect()
425    }
426
427    /// Find tokens by exact name
428    pub fn tokens_by_name(&self, name: &str) -> Option<&Vec<u32>> {
429        self.token_by_name.get(&CompactString::new(name))
430    }
431
432    /// Find tokens matching a name pattern (substring match)
433    pub fn tokens_matching(&self, pattern: &str) -> Vec<u32> {
434        let pattern_lower = pattern.to_lowercase();
435        self.token_by_name
436            .iter()
437            .filter(|(name, _)| name.to_lowercase().contains(&pattern_lower))
438            .flat_map(|(_, ids)| ids.iter().copied())
439            .collect()
440    }
441
442    /// Get a symbol by ID
443    #[inline]
444    pub fn symbol(&self, id: u32) -> Option<&Symbol> {
445        self.symbols.get(id as usize)
446    }
447
448    /// Get a token by ID
449    #[inline]
450    pub fn token(&self, id: u32) -> Option<&Token> {
451        self.tokens.get(id as usize)
452    }
453
454    /// Get a scope by ID
455    #[inline]
456    pub fn scope(&self, id: u32) -> Option<&Scope> {
457        self.scopes.get(id as usize)
458    }
459
460    /// Get the name of a symbol
461    pub fn symbol_name(&self, symbol: &Symbol) -> Option<&str> {
462        self.strings.get(symbol.name_offset)
463    }
464
465    /// Get the name of a token
466    pub fn token_name(&self, token: &Token) -> Option<&str> {
467        self.strings.get(token.name_offset)
468    }
469
470    /// Get the file path for a file_id
471    pub fn file_path(&self, file_id: u16) -> Option<&PathBuf> {
472        self.files.get(file_id as usize)
473    }
474
475    /// Get symbols that call a given symbol (incoming edges)
476    pub fn callers(&self, symbol_id: u32) -> &[u32] {
477        self.incoming_edges
478            .get(symbol_id as usize)
479            .map(|v| v.as_slice())
480            .unwrap_or(&[])
481    }
482
483    /// Get symbols that a given symbol calls (outgoing edges)
484    pub fn callees(&self, symbol_id: u32) -> &[u32] {
485        self.outgoing_edges
486            .get(symbol_id as usize)
487            .map(|v| v.as_slice())
488            .unwrap_or(&[])
489    }
490
491    /// Get all references to a symbol
492    pub fn references_to(&self, symbol_id: u32) -> impl Iterator<Item = &Reference> + '_ {
493        self.refs_to_symbol
494            .get(symbol_id as usize)
495            .into_iter()
496            .flat_map(move |token_ids| {
497                let sid = symbol_id;
498                token_ids.iter().filter_map(move |&tid| {
499                    self.references
500                        .iter()
501                        .find(|r| r.token_id == tid && r.symbol_id == sid)
502                })
503            })
504    }
505
506    /// Get references of a specific kind to a symbol
507    pub fn references_of_kind(
508        &self,
509        symbol_id: u32,
510        kind: RefKind,
511    ) -> impl Iterator<Item = &Reference> {
512        self.references_to(symbol_id)
513            .filter(move |r| r.ref_kind() == kind)
514    }
515
516    /// Get call references to a symbol
517    pub fn call_references(&self, symbol_id: u32) -> impl Iterator<Item = &Reference> {
518        self.references_of_kind(symbol_id, RefKind::Call)
519    }
520
521    /// Get all entry point symbols
522    pub fn entry_point_symbols(&self) -> impl Iterator<Item = &Symbol> {
523        self.entry_points.iter().filter_map(|&id| self.symbol(id))
524    }
525
526    /// Find symbols in a specific file
527    pub fn symbols_in_file(&self, file_id: u16) -> impl Iterator<Item = &Symbol> {
528        self.symbols.iter().filter(move |s| s.file_id == file_id)
529    }
530
531    /// Find tokens in a specific file
532    pub fn tokens_in_file(&self, file_id: u16) -> impl Iterator<Item = &Token> {
533        self.tokens.iter().filter(move |t| t.file_id == file_id)
534    }
535
536    // -------------------------------------------------------------------------
537    // Statistics
538    // -------------------------------------------------------------------------
539
540    /// Get index statistics
541    pub fn stats(&self) -> IndexStats {
542        IndexStats {
543            symbols: self.symbols.len(),
544            tokens: self.tokens.len(),
545            references: self.references.len(),
546            scopes: self.scopes.len(),
547            edges: self.edges.len(),
548            files: self.files.len(),
549            entry_points: self.entry_points.len(),
550            string_bytes: self.strings.byte_size(),
551            unique_names: self.strings.len(),
552        }
553    }
554
555    // -------------------------------------------------------------------------
556    // Incremental Update Methods
557    // -------------------------------------------------------------------------
558
559    /// Find file_id for a given path, if it exists in the index
560    pub fn file_id_for_path(&self, path: &std::path::Path) -> Option<u16> {
561        self.files.iter().position(|p| p == path).map(|i| i as u16)
562    }
563
564    /// Remove all data associated with a specific file.
565    ///
566    /// This removes symbols, tokens, references, scopes, and edges for the file,
567    /// and cleans up the lookup structures accordingly.
568    ///
569    /// Returns the number of symbols removed.
570    pub fn remove_file_data(&mut self, file_id: u16) -> usize {
571        // Collect symbol IDs to remove
572        let symbols_to_remove: Vec<u32> = self
573            .symbols
574            .iter()
575            .filter(|s| s.file_id == file_id)
576            .map(|s| s.id)
577            .collect();
578
579        if symbols_to_remove.is_empty() {
580            return 0;
581        }
582
583        let removed_count = symbols_to_remove.len();
584
585        // Create a set for fast lookup
586        let symbol_set: std::collections::HashSet<u32> =
587            symbols_to_remove.iter().copied().collect();
588
589        // Remove symbols from symbol_by_name lookup
590        for symbol_id in &symbols_to_remove {
591            if let Some(symbol) = self.symbols.get(*symbol_id as usize) {
592                if let Some(name) = self.strings.get(symbol.name_offset) {
593                    let compact_name = CompactString::new(name);
594                    if let Some(ids) = self.symbol_by_name.get_mut(&compact_name) {
595                        ids.retain(|id| *id != *symbol_id);
596                        if ids.is_empty() {
597                            self.symbol_by_name.remove(&compact_name);
598                        }
599                    }
600                }
601            }
602        }
603
604        // Remove from entry_points
605        self.entry_points.retain(|&id| !symbol_set.contains(&id));
606
607        // Clear adjacency lists for removed symbols and remove edges pointing to/from them
608        for &symbol_id in &symbols_to_remove {
609            let idx = symbol_id as usize;
610            if idx < self.incoming_edges.len() {
611                self.incoming_edges[idx].clear();
612            }
613            if idx < self.outgoing_edges.len() {
614                self.outgoing_edges[idx].clear();
615            }
616            if idx < self.refs_to_symbol.len() {
617                self.refs_to_symbol[idx].clear();
618            }
619        }
620
621        // Remove edges involving these symbols
622        self.edges
623            .retain(|e| !symbol_set.contains(&e.from_symbol) && !symbol_set.contains(&e.to_symbol));
624
625        // Rebuild adjacency lists from remaining edges (simpler than surgical removal)
626        for adj in &mut self.incoming_edges {
627            adj.retain(|id| !symbol_set.contains(id));
628        }
629        for adj in &mut self.outgoing_edges {
630            adj.retain(|id| !symbol_set.contains(id));
631        }
632
633        // Collect token IDs to remove
634        let tokens_to_remove: Vec<u32> = self
635            .tokens
636            .iter()
637            .filter(|t| t.file_id == file_id)
638            .map(|t| t.id)
639            .collect();
640
641        let token_set: std::collections::HashSet<u32> = tokens_to_remove.iter().copied().collect();
642
643        // Remove tokens from token_by_name lookup
644        for token_id in &tokens_to_remove {
645            if let Some(token) = self.tokens.get(*token_id as usize) {
646                if let Some(name) = self.strings.get(token.name_offset) {
647                    let compact_name = CompactString::new(name);
648                    if let Some(ids) = self.token_by_name.get_mut(&compact_name) {
649                        ids.retain(|&id| id != *token_id);
650                        if ids.is_empty() {
651                            self.token_by_name.remove(&compact_name);
652                        }
653                    }
654                }
655            }
656        }
657
658        // Remove references involving removed tokens or symbols
659        self.references
660            .retain(|r| !token_set.contains(&r.token_id) && !symbol_set.contains(&r.symbol_id));
661
662        // Update refs_to_symbol to remove references to removed tokens
663        for refs in &mut self.refs_to_symbol {
664            refs.retain(|&tid| !token_set.contains(&tid));
665        }
666
667        // Remove scopes for this file
668        // Note: We keep scope slots to preserve IDs, just mark them as invalid
669        for scope in &mut self.scopes {
670            if scope.file_id == file_id {
671                // Reset to an empty/invalid scope
672                scope.kind = super::types::ScopeKind::Unknown as u8;
673                scope.start_line = 0;
674                scope.end_line = 0;
675            }
676        }
677
678        // Note: We don't remove the file from self.files to preserve file_id mappings
679        // The file slot remains but with potentially no associated data
680
681        removed_count
682    }
683
684    /// Get the next available symbol ID (for incremental additions)
685    pub fn next_symbol_id(&self) -> u32 {
686        self.symbols.len() as u32
687    }
688
689    /// Get the next available token ID (for incremental additions)
690    pub fn next_token_id(&self) -> u32 {
691        self.tokens.len() as u32
692    }
693
694    /// Get the next available scope ID (for incremental additions)
695    pub fn next_scope_id(&self) -> u32 {
696        self.scopes.len() as u32
697    }
698
699    /// Check if a file path is already indexed
700    pub fn has_file(&self, path: &std::path::Path) -> bool {
701        self.file_id_for_path(path).is_some()
702    }
703}
704
705impl Default for SemanticIndex {
706    fn default() -> Self {
707        Self::new()
708    }
709}
710
711// =============================================================================
712// INDEX STATISTICS
713// =============================================================================
714
715/// Statistics about the semantic index
716#[derive(Debug, Clone, Copy)]
717pub struct IndexStats {
718    pub symbols: usize,
719    pub tokens: usize,
720    pub references: usize,
721    pub scopes: usize,
722    pub edges: usize,
723    pub files: usize,
724    pub entry_points: usize,
725    pub string_bytes: usize,
726    pub unique_names: usize,
727}
728
729impl std::fmt::Display for IndexStats {
730    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
731        writeln!(f, "Semantic Index Statistics:")?;
732        writeln!(f, "  Symbols:      {:>8}", self.symbols)?;
733        writeln!(f, "  Tokens:       {:>8}", self.tokens)?;
734        writeln!(f, "  References:   {:>8}", self.references)?;
735        writeln!(f, "  Scopes:       {:>8}", self.scopes)?;
736        writeln!(f, "  Edges:        {:>8}", self.edges)?;
737        writeln!(f, "  Files:        {:>8}", self.files)?;
738        writeln!(f, "  Entry Points: {:>8}", self.entry_points)?;
739        writeln!(f, "  String Bytes: {:>8}", self.string_bytes)?;
740        writeln!(f, "  Unique Names: {:>8}", self.unique_names)?;
741        Ok(())
742    }
743}
744
745// =============================================================================
746// TESTS
747// =============================================================================
748
749#[cfg(test)]
750mod tests {
751    use super::*;
752    use crate::trace::types::{SymbolFlags, TokenKind};
753
754    #[test]
755    fn test_string_table_interning() {
756        let mut table = StringTable::new();
757
758        let offset1 = table.intern("foo");
759        let offset2 = table.intern("bar");
760        let offset3 = table.intern("foo"); // Duplicate
761
762        assert_eq!(offset1, offset3); // Same string = same offset
763        assert_ne!(offset1, offset2); // Different strings = different offsets
764
765        assert_eq!(table.get(offset1), Some("foo"));
766        assert_eq!(table.get(offset2), Some("bar"));
767        assert_eq!(table.len(), 2);
768    }
769
770    #[test]
771    fn test_semantic_index_basic() {
772        let mut index = SemanticIndex::new();
773
774        // Add a file
775        let file_id = index.add_file(PathBuf::from("test.ts"));
776        assert_eq!(file_id, 0);
777
778        // Add a symbol
779        let name_offset = index.strings.intern("myFunction");
780        let symbol = Symbol::new(
781            0,
782            name_offset,
783            file_id,
784            SymbolKind::Function,
785            SymbolFlags::IS_ENTRY_POINT | SymbolFlags::IS_EXPORTED,
786            10,
787            20,
788        );
789        index.add_symbol(symbol, "myFunction");
790
791        // Verify lookups
792        assert_eq!(index.symbols.len(), 1);
793        assert!(index.symbols_by_name("myFunction").is_some());
794        assert_eq!(index.entry_points.len(), 1);
795    }
796
797    #[test]
798    fn test_call_graph() {
799        let mut index = SemanticIndex::new();
800        let file_id = index.add_file(PathBuf::from("test.ts"));
801
802        // Add two symbols
803        let name1 = index.strings.intern("caller");
804        let name2 = index.strings.intern("callee");
805
806        let sym1 = Symbol::new(
807            0,
808            name1,
809            file_id,
810            SymbolKind::Function,
811            SymbolFlags::empty(),
812            1,
813            10,
814        );
815        let sym2 = Symbol::new(
816            1,
817            name2,
818            file_id,
819            SymbolKind::Function,
820            SymbolFlags::empty(),
821            15,
822            25,
823        );
824
825        index.add_symbol(sym1, "caller");
826        index.add_symbol(sym2, "callee");
827
828        // Add edge: caller -> callee
829        index.add_edge(Edge::new(0, 1, 5));
830
831        // Verify graph
832        assert_eq!(index.callers(1), &[0]);
833        assert_eq!(index.callees(0), &[1]);
834    }
835
836    #[test]
837    fn test_references() {
838        let mut index = SemanticIndex::new();
839        let file_id = index.add_file(PathBuf::from("test.ts"));
840
841        // Add a symbol
842        let name = index.strings.intern("myVar");
843        let symbol = Symbol::new(
844            0,
845            name,
846            file_id,
847            SymbolKind::Variable,
848            SymbolFlags::empty(),
849            1,
850            1,
851        );
852        index.add_symbol(symbol, "myVar");
853
854        // Add a token and reference
855        let token = Token::new(0, name, file_id, 5, 10, TokenKind::Identifier, 0);
856        index.add_token(token, "myVar");
857        index.add_reference(Reference::new(0, 0, RefKind::Read));
858
859        // Verify
860        let refs: Vec<_> = index.references_to(0).collect();
861        assert_eq!(refs.len(), 1);
862        assert!(refs[0].is_read());
863    }
864
865    #[test]
866    fn test_file_id_for_path() {
867        let mut index = SemanticIndex::new();
868
869        let file1_id = index.add_file(PathBuf::from("src/main.ts"));
870        let file2_id = index.add_file(PathBuf::from("src/utils.ts"));
871
872        assert_eq!(
873            index.file_id_for_path(std::path::Path::new("src/main.ts")),
874            Some(file1_id)
875        );
876        assert_eq!(
877            index.file_id_for_path(std::path::Path::new("src/utils.ts")),
878            Some(file2_id)
879        );
880        assert_eq!(
881            index.file_id_for_path(std::path::Path::new("src/other.ts")),
882            None
883        );
884    }
885
886    #[test]
887    fn test_remove_file_data() {
888        let mut index = SemanticIndex::new();
889
890        // Add two files with symbols
891        let file1_id = index.add_file(PathBuf::from("file1.ts"));
892        let file2_id = index.add_file(PathBuf::from("file2.ts"));
893
894        let name1 = index.strings.intern("func1");
895        let name2 = index.strings.intern("func2");
896
897        let sym1 = Symbol::new(
898            0,
899            name1,
900            file1_id,
901            SymbolKind::Function,
902            SymbolFlags::IS_ENTRY_POINT,
903            1,
904            10,
905        );
906        let sym2 = Symbol::new(
907            1,
908            name2,
909            file2_id,
910            SymbolKind::Function,
911            SymbolFlags::empty(),
912            1,
913            10,
914        );
915
916        index.add_symbol(sym1, "func1");
917        index.add_symbol(sym2, "func2");
918
919        // Add an edge between them
920        index.add_edge(Edge::new(0, 1, 5));
921
922        // Verify initial state
923        assert!(index.symbols_by_name("func1").is_some());
924        assert!(index.symbols_by_name("func2").is_some());
925        assert_eq!(index.entry_points.len(), 1);
926        assert_eq!(index.edges.len(), 1);
927
928        // Remove file1's data
929        let removed = index.remove_file_data(file1_id);
930        assert_eq!(removed, 1, "Should remove 1 symbol");
931
932        // func1 should be gone from lookup
933        assert!(
934            index
935                .symbols_by_name("func1")
936                .map(|s| s.is_empty())
937                .unwrap_or(true),
938            "func1 should be removed from lookup"
939        );
940
941        // func2 should still be there
942        assert!(
943            index
944                .symbols_by_name("func2")
945                .map(|s| !s.is_empty())
946                .unwrap_or(false),
947            "func2 should still be in lookup"
948        );
949
950        // Entry points should be empty (func1 was the only entry point)
951        assert!(
952            index.entry_points.is_empty(),
953            "Entry points should be empty"
954        );
955
956        // Edges involving func1 should be removed
957        assert!(index.edges.is_empty(), "Edges should be removed");
958    }
959
960    #[test]
961    fn test_next_id_methods() {
962        let mut index = SemanticIndex::new();
963        let file_id = index.add_file(PathBuf::from("test.ts"));
964
965        // Initially all next IDs should be 0
966        assert_eq!(index.next_symbol_id(), 0);
967        assert_eq!(index.next_token_id(), 0);
968        assert_eq!(index.next_scope_id(), 0);
969
970        // Add a symbol
971        let name = index.strings.intern("test");
972        let symbol = Symbol::new(
973            0,
974            name,
975            file_id,
976            SymbolKind::Function,
977            SymbolFlags::empty(),
978            1,
979            10,
980        );
981        index.add_symbol(symbol, "test");
982
983        // Next symbol ID should be 1
984        assert_eq!(index.next_symbol_id(), 1);
985
986        // Add a token
987        let token = Token::new(0, name, file_id, 1, 0, TokenKind::Identifier, 0);
988        index.add_token(token, "test");
989
990        // Next token ID should be 1
991        assert_eq!(index.next_token_id(), 1);
992    }
993
994    #[test]
995    fn test_has_file() {
996        let mut index = SemanticIndex::new();
997
998        assert!(!index.has_file(std::path::Path::new("test.ts")));
999
1000        index.add_file(PathBuf::from("test.ts"));
1001
1002        assert!(index.has_file(std::path::Path::new("test.ts")));
1003        assert!(!index.has_file(std::path::Path::new("other.ts")));
1004    }
1005}