greppy/trace/
builder.rs

1//! Semantic Index Builder
2//!
3//! Builds a SemanticIndex from extracted file data.
4//! This module bridges the extraction layer with the index layer.
5//!
6//! @module trace/builder
7
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10
11use rayon::prelude::*;
12use tracing::{debug, info};
13
14use super::extract::{extract_file, ExtractedCall, ExtractedData, ExtractedSymbol};
15use super::index::SemanticIndex;
16use super::storage::{save_index, trace_index_path};
17use super::types::{
18    Edge, RefKind, Reference, Scope, ScopeKind, Symbol, SymbolFlags, SymbolKind, Token, TokenKind,
19};
20use crate::core::error::Result;
21
22// =============================================================================
23// BUILDER
24// =============================================================================
25
26/// Builder for constructing a SemanticIndex from source files
27pub struct SemanticIndexBuilder {
28    /// The index being built
29    index: SemanticIndex,
30    /// Map from (file_id, symbol_name) to symbol_id for call resolution
31    symbol_lookup: HashMap<String, Vec<u32>>,
32    /// Next symbol ID
33    next_symbol_id: u32,
34    /// Next token ID
35    next_token_id: u32,
36    /// Next scope ID
37    next_scope_id: u32,
38    /// Project root for relative paths
39    project_root: PathBuf,
40}
41
42impl SemanticIndexBuilder {
43    /// Create a new builder
44    pub fn new(project_root: impl AsRef<Path>) -> Self {
45        Self {
46            index: SemanticIndex::new(),
47            symbol_lookup: HashMap::new(),
48            next_symbol_id: 0,
49            next_token_id: 0,
50            next_scope_id: 0,
51            project_root: project_root.as_ref().to_path_buf(),
52        }
53    }
54
55    /// Create with estimated capacity
56    pub fn with_capacity(project_root: impl AsRef<Path>, estimated_files: usize) -> Self {
57        // Estimate ~50 symbols, ~200 tokens, ~100 refs per file
58        let symbols = estimated_files * 50;
59        let tokens = estimated_files * 200;
60        let refs = estimated_files * 100;
61        let scopes = estimated_files * 20;
62        let edges = estimated_files * 30;
63
64        Self {
65            index: SemanticIndex::with_capacity(
66                symbols,
67                tokens,
68                refs,
69                scopes,
70                edges,
71                estimated_files,
72            ),
73            symbol_lookup: HashMap::with_capacity(symbols),
74            next_symbol_id: 0,
75            next_token_id: 0,
76            next_scope_id: 0,
77            project_root: project_root.as_ref().to_path_buf(),
78        }
79    }
80
81    /// Add a file to the index
82    pub fn add_file(&mut self, path: &Path, content: &str) {
83        // Get relative path for storage
84        let rel_path = path
85            .strip_prefix(&self.project_root)
86            .unwrap_or(path)
87            .to_path_buf();
88
89        // Add file to index
90        let file_id = self.index.add_file(rel_path.clone());
91
92        // Extract data from file
93        let data = extract_file(path, content, None);
94
95        if data.is_empty() {
96            return;
97        }
98
99        debug!(
100            file = %rel_path.display(),
101            symbols = data.symbols.len(),
102            calls = data.calls.len(),
103            tokens = data.tokens.len(),
104            method = data.extraction_method.as_str(),
105            "Extracted"
106        );
107
108        // Add symbols with file path context for entry point detection
109        for sym in &data.symbols {
110            self.add_symbol_with_path(file_id, sym, Some(&rel_path));
111        }
112
113        // Add tokens
114        for tok in &data.tokens {
115            self.add_token(file_id, tok);
116        }
117
118        // Add scopes
119        for (idx, scope) in data.scopes.iter().enumerate() {
120            self.add_scope(file_id, scope, idx);
121        }
122
123        // Store calls for later edge resolution
124        // We need to resolve calls after all symbols are added
125        for call in &data.calls {
126            self.add_call_token(file_id, call);
127        }
128
129        // Add construction references
130        for ref_item in &data.references {
131            self.add_construction_reference(file_id, ref_item);
132        }
133    }
134
135    /// Add a construction reference to the index
136    fn add_construction_reference(
137        &mut self,
138        file_id: u16,
139        extracted: &super::extract::ExtractedRef,
140    ) {
141        // Only process construction references
142        if extracted.kind != super::extract::RefKind::Construction {
143            return;
144        }
145
146        let id = self.next_token_id;
147        self.next_token_id += 1;
148
149        let name_offset = self.index.strings.intern(&extracted.name);
150
151        // Create a token for the construction site
152        let token = Token::new(
153            id,
154            name_offset,
155            file_id,
156            extracted.line,
157            extracted.column,
158            TokenKind::Type,
159            0,
160        );
161
162        self.index.add_token(token, &extracted.name);
163
164        // Try to find the symbol being constructed and add a reference
165        if let Some(target_ids) = self.symbol_lookup.get(&extracted.name) {
166            for &target_id in target_ids {
167                self.index
168                    .add_reference(Reference::new(id, target_id, RefKind::Construction));
169            }
170        }
171    }
172
173    /// Add a symbol with file path context for entry point detection
174    fn add_symbol_with_path(
175        &mut self,
176        file_id: u16,
177        extracted: &ExtractedSymbol,
178        file_path: Option<&Path>,
179    ) {
180        let id = self.next_symbol_id;
181        self.next_symbol_id += 1;
182
183        // Intern the name
184        let name_offset = self.index.strings.intern(&extracted.name);
185
186        // Convert kind
187        let kind = match extracted.kind {
188            super::extract::SymbolKind::Function => SymbolKind::Function,
189            super::extract::SymbolKind::Method => SymbolKind::Method,
190            super::extract::SymbolKind::Class => SymbolKind::Class,
191            super::extract::SymbolKind::Struct => SymbolKind::Struct,
192            super::extract::SymbolKind::Enum => SymbolKind::Enum,
193            super::extract::SymbolKind::Interface => SymbolKind::Interface,
194            super::extract::SymbolKind::TypeAlias => SymbolKind::TypeAlias,
195            super::extract::SymbolKind::Constant => SymbolKind::Constant,
196            super::extract::SymbolKind::Variable => SymbolKind::Variable,
197            super::extract::SymbolKind::Module => SymbolKind::Module,
198            // Trait maps to Interface (closest semantic match)
199            super::extract::SymbolKind::Trait => SymbolKind::Interface,
200            // Impl blocks are not tracked as standalone symbols
201            super::extract::SymbolKind::Impl => SymbolKind::Unknown,
202        };
203
204        // Build flags
205        let mut flags = SymbolFlags::empty();
206        if extracted.is_exported {
207            flags |= SymbolFlags::IS_EXPORTED;
208        }
209        if extracted.is_async {
210            flags |= SymbolFlags::IS_ASYNC;
211        }
212
213        // Detect entry points using multiple heuristics
214        let is_entry_point =
215            self.detect_entry_point(&extracted.name, kind, extracted.is_exported, file_path);
216        if is_entry_point {
217            flags |= SymbolFlags::IS_ENTRY_POINT;
218        }
219
220        let symbol = Symbol::new(
221            id,
222            name_offset,
223            file_id,
224            kind,
225            flags,
226            extracted.start_line,
227            extracted.end_line,
228        );
229
230        self.index.add_symbol(symbol, &extracted.name);
231
232        // Add to lookup for call resolution
233        self.symbol_lookup
234            .entry(extracted.name.clone())
235            .or_default()
236            .push(id);
237    }
238
239    /// Detect if a symbol is an entry point based on various heuristics
240    fn detect_entry_point(
241        &self,
242        name: &str,
243        kind: SymbolKind,
244        is_exported: bool,
245        file_path: Option<&Path>,
246    ) -> bool {
247        // Only functions and methods can be entry points
248        if !matches!(kind, SymbolKind::Function | SymbolKind::Method) {
249            return false;
250        }
251
252        // main() is always an entry point
253        if name == "main" {
254            return true;
255        }
256
257        // Exported functions/methods are entry points
258        if is_exported {
259            return true;
260        }
261
262        // Check file path patterns for entry points
263        if let Some(path) = file_path {
264            let path_str = path.to_string_lossy().to_lowercase();
265            let file_name = path
266                .file_name()
267                .map(|n| n.to_string_lossy().to_lowercase())
268                .unwrap_or_default();
269
270            // Rust: public items in lib.rs are entry points
271            if file_name == "lib.rs" {
272                return true;
273            }
274
275            // Test files: test functions are entry points
276            if path_str.contains("test")
277                || path_str.contains("spec")
278                || file_name.starts_with("test_")
279                || file_name.ends_with("_test.rs")
280            {
281                return true;
282            }
283
284            // Benchmark files
285            if path_str.contains("bench") {
286                return true;
287            }
288
289            // TypeScript/JavaScript: index files and handlers
290            if file_name == "index.ts"
291                || file_name == "index.js"
292                || file_name == "index.tsx"
293                || file_name == "index.jsx"
294            {
295                return true;
296            }
297
298            // Check for common handler patterns
299            if matches!(
300                name,
301                "handler" | "default" | "GET" | "POST" | "PUT" | "DELETE" | "PATCH"
302            ) {
303                return true;
304            }
305        }
306
307        // Python: test_ prefixed functions are entry points
308        if name.starts_with("test_") {
309            return true;
310        }
311
312        false
313    }
314
315    /// Add a token to the index
316    fn add_token(&mut self, file_id: u16, extracted: &super::extract::ExtractedToken) {
317        let id = self.next_token_id;
318        self.next_token_id += 1;
319
320        let name_offset = self.index.strings.intern(&extracted.name);
321
322        let kind = match extracted.kind {
323            super::extract::TokenKind::Identifier => TokenKind::Identifier,
324            // Keywords, Operators, Literals, Comments map to Unknown (no semantic index equivalent)
325            super::extract::TokenKind::Keyword => TokenKind::Unknown,
326            super::extract::TokenKind::Operator => TokenKind::Unknown,
327            super::extract::TokenKind::Literal => TokenKind::Unknown,
328            super::extract::TokenKind::Comment => TokenKind::Unknown,
329            super::extract::TokenKind::Unknown => TokenKind::Unknown,
330        };
331
332        let token = Token::new(
333            id,
334            name_offset,
335            file_id,
336            extracted.line,
337            extracted.column,
338            kind,
339            0, // scope_id - would need scope resolution
340        );
341
342        self.index.add_token(token, &extracted.name);
343    }
344
345    /// Add a call as a token (for later edge resolution)
346    fn add_call_token(&mut self, file_id: u16, call: &ExtractedCall) {
347        let id = self.next_token_id;
348        self.next_token_id += 1;
349
350        let name_offset = self.index.strings.intern(&call.callee_name);
351
352        let token = Token::new(
353            id,
354            name_offset,
355            file_id,
356            call.line,
357            call.column,
358            TokenKind::Call,
359            0,
360        );
361
362        self.index.add_token(token, &call.callee_name);
363    }
364
365    /// Add a scope to the index
366    fn add_scope(&mut self, file_id: u16, extracted: &super::extract::ExtractedScope, _idx: usize) {
367        let id = self.next_scope_id;
368        self.next_scope_id += 1;
369
370        let kind = match extracted.kind {
371            super::extract::ScopeKind::File => ScopeKind::File,
372            super::extract::ScopeKind::Module => ScopeKind::Module,
373            super::extract::ScopeKind::Class => ScopeKind::Class,
374            super::extract::ScopeKind::Function => ScopeKind::Function,
375            super::extract::ScopeKind::Block => ScopeKind::Block,
376            // Loop and Conditional map to Block (generic block scope)
377            super::extract::ScopeKind::Loop => ScopeKind::Block,
378            super::extract::ScopeKind::Conditional => ScopeKind::Block,
379        };
380
381        let parent_id = extracted.parent_index.map(|i| i as u32).unwrap_or(u32::MAX);
382        let name_offset = extracted
383            .name
384            .as_ref()
385            .map(|n| self.index.strings.intern(n))
386            .unwrap_or(0);
387
388        let scope = Scope::new(
389            id,
390            kind,
391            file_id,
392            parent_id,
393            extracted.start_line,
394            extracted.end_line,
395            name_offset,
396        );
397
398        self.index.add_scope(scope);
399    }
400
401    /// Resolve call edges after all files are processed
402    pub fn resolve_edges(&mut self) {
403        // For each call token, try to find the target symbol
404        let call_tokens: Vec<_> = self
405            .index
406            .tokens
407            .iter()
408            .filter(|t| t.token_kind() == TokenKind::Call)
409            .cloned()
410            .collect();
411
412        for token in call_tokens {
413            let callee_name = match self.index.strings.get(token.name_offset) {
414                Some(name) => name.to_string(),
415                None => continue,
416            };
417
418            // Find symbols with this name
419            if let Some(target_ids) = self.symbol_lookup.get(&callee_name) {
420                // Find the containing symbol for this call
421                let caller_id = self.find_containing_symbol(token.file_id, token.line);
422
423                if let Some(caller_id) = caller_id {
424                    // Add edges to all matching symbols (could be overloaded)
425                    for &target_id in target_ids {
426                        // Don't add self-edges
427                        if caller_id != target_id {
428                            self.index
429                                .add_edge(Edge::new(caller_id, target_id, token.line));
430                        }
431                    }
432
433                    // Add reference
434                    for &target_id in target_ids {
435                        self.index.add_reference(Reference::new(
436                            token.id,
437                            target_id,
438                            RefKind::Call,
439                        ));
440                    }
441                }
442            }
443        }
444
445        info!(
446            edges = self.index.edges.len(),
447            references = self.index.references.len(),
448            "Resolved call edges"
449        );
450    }
451
452    /// Find the symbol containing a given line in a file
453    fn find_containing_symbol(&self, file_id: u16, line: u32) -> Option<u32> {
454        // Find the smallest symbol that contains this line
455        let mut best: Option<(u32, u32)> = None; // (symbol_id, size)
456
457        for symbol in &self.index.symbols {
458            if symbol.file_id == file_id && symbol.start_line <= line && symbol.end_line >= line {
459                let size = symbol.end_line - symbol.start_line;
460                match best {
461                    None => best = Some((symbol.id, size)),
462                    Some((_, best_size)) if size < best_size => best = Some((symbol.id, size)),
463                    _ => {}
464                }
465            }
466        }
467
468        best.map(|(id, _)| id)
469    }
470
471    /// Build the final index
472    pub fn build(mut self) -> SemanticIndex {
473        self.resolve_edges();
474        self.index
475    }
476
477    /// Get statistics about the build
478    pub fn stats(&self) -> BuildStats {
479        BuildStats {
480            files: self.index.files.len(),
481            symbols: self.index.symbols.len(),
482            tokens: self.index.tokens.len(),
483            scopes: self.index.scopes.len(),
484            edges: self.index.edges.len(),
485            references: self.index.references.len(),
486        }
487    }
488}
489
490/// Statistics about the build process
491#[derive(Debug, Clone, Copy)]
492pub struct BuildStats {
493    pub files: usize,
494    pub symbols: usize,
495    pub tokens: usize,
496    pub scopes: usize,
497    pub edges: usize,
498    pub references: usize,
499}
500
501impl std::fmt::Display for BuildStats {
502    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
503        write!(
504            f,
505            "{} files, {} symbols, {} tokens, {} edges",
506            self.files, self.symbols, self.tokens, self.edges
507        )
508    }
509}
510
511// =============================================================================
512// PARALLEL BUILDER
513// =============================================================================
514
515/// Build a SemanticIndex from a list of files in parallel
516pub fn build_index_parallel(project_root: &Path, files: &[(PathBuf, String)]) -> SemanticIndex {
517    info!(files = files.len(), "Building semantic index");
518
519    // Extract all files in parallel
520    let extractions: Vec<(PathBuf, ExtractedData)> = files
521        .par_iter()
522        .map(|(path, content)| {
523            let data = extract_file(path, content, None);
524            (path.clone(), data)
525        })
526        .collect();
527
528    // Build index sequentially (index is not thread-safe)
529    let mut builder = SemanticIndexBuilder::with_capacity(project_root, files.len());
530
531    for (path, data) in &extractions {
532        if !data.is_empty() {
533            // Re-add using the builder's method which handles all the details
534            let rel_path = path
535                .strip_prefix(project_root)
536                .unwrap_or(path)
537                .to_path_buf();
538            let file_id = builder.index.add_file(rel_path.clone());
539
540            // Add symbols with file path for entry point detection
541            for sym in &data.symbols {
542                builder.add_symbol_with_path(file_id, sym, Some(&rel_path));
543            }
544
545            // Add tokens
546            for tok in &data.tokens {
547                builder.add_token(file_id, tok);
548            }
549
550            // Add scopes
551            for (idx, scope) in data.scopes.iter().enumerate() {
552                builder.add_scope(file_id, scope, idx);
553            }
554
555            // Add calls
556            for call in &data.calls {
557                builder.add_call_token(file_id, call);
558            }
559
560            // Add construction references
561            for ref_item in &data.references {
562                builder.add_construction_reference(file_id, ref_item);
563            }
564        }
565    }
566
567    let stats = builder.stats();
568    info!(%stats, "Extraction complete, resolving edges");
569
570    builder.build()
571}
572
573/// Build and save a semantic index for a project
574pub fn build_and_save_index(
575    project_root: &Path,
576    files: &[(PathBuf, String)],
577) -> Result<BuildStats> {
578    let index = build_index_parallel(project_root, files);
579    let stats = index.stats();
580
581    let path = trace_index_path(project_root);
582
583    // Ensure directory exists
584    if let Some(parent) = path.parent() {
585        std::fs::create_dir_all(parent)?;
586    }
587
588    save_index(&index, &path)?;
589
590    info!(
591        path = %path.display(),
592        symbols = stats.symbols,
593        tokens = stats.tokens,
594        edges = stats.edges,
595        "Saved semantic index"
596    );
597
598    Ok(BuildStats {
599        files: stats.files,
600        symbols: stats.symbols,
601        tokens: stats.tokens,
602        scopes: stats.scopes,
603        edges: stats.edges,
604        references: stats.references,
605    })
606}
607
608// =============================================================================
609// INCREMENTAL UPDATE
610// =============================================================================
611
612/// Result of an incremental file update
613#[derive(Debug, Clone, Copy)]
614pub struct IncrementalUpdateResult {
615    /// Number of symbols removed (from old version)
616    pub symbols_removed: usize,
617    /// Number of symbols added (from new version)
618    pub symbols_added: usize,
619    /// Number of edges after update
620    pub edges_count: usize,
621    /// Time taken in milliseconds
622    pub elapsed_ms: f64,
623}
624
625impl std::fmt::Display for IncrementalUpdateResult {
626    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
627        write!(
628            f,
629            "removed {} symbols, added {} symbols, {} edges ({:.1}ms)",
630            self.symbols_removed, self.symbols_added, self.edges_count, self.elapsed_ms
631        )
632    }
633}
634
635/// Update a single file in an existing SemanticIndex.
636///
637/// This performs an incremental update:
638/// 1. Remove all data for the old version of the file (if it exists)
639/// 2. Re-extract the file
640/// 3. Add the new symbols, tokens, references, scopes
641/// 4. Re-resolve edges for the new symbols
642///
643/// This is much faster than rebuilding the entire index (~20-50ms per file).
644///
645/// # Arguments
646/// * `index` - The existing SemanticIndex to update
647/// * `project_root` - Project root path for computing relative paths
648/// * `path` - Path to the file to update
649/// * `content` - New content of the file
650///
651/// # Returns
652/// Statistics about the update operation
653pub fn update_file_incremental(
654    index: &mut SemanticIndex,
655    project_root: &Path,
656    path: &Path,
657    content: &str,
658) -> IncrementalUpdateResult {
659    let start = std::time::Instant::now();
660
661    // Get relative path
662    let rel_path = path
663        .strip_prefix(project_root)
664        .unwrap_or(path)
665        .to_path_buf();
666
667    // Check if file already exists in index
668    let file_id = if let Some(existing_id) = index.file_id_for_path(&rel_path) {
669        // Remove old data
670        let removed = index.remove_file_data(existing_id);
671        debug!(
672            file = %rel_path.display(),
673            removed_symbols = removed,
674            "Removed old file data"
675        );
676        existing_id
677    } else {
678        // New file - add it
679        index.add_file(rel_path.clone())
680    };
681
682    // Extract data from file
683    let data = extract_file(path, content, None);
684
685    if data.is_empty() {
686        return IncrementalUpdateResult {
687            symbols_removed: 0,
688            symbols_added: 0,
689            edges_count: index.edges.len(),
690            elapsed_ms: start.elapsed().as_secs_f64() * 1000.0,
691        };
692    }
693
694    debug!(
695        file = %rel_path.display(),
696        symbols = data.symbols.len(),
697        calls = data.calls.len(),
698        tokens = data.tokens.len(),
699        method = data.extraction_method.as_str(),
700        "Extracted for incremental update"
701    );
702
703    // Track IDs for the new symbols
704    let mut new_symbol_ids: Vec<u32> = Vec::with_capacity(data.symbols.len());
705    let mut symbol_lookup: HashMap<String, Vec<u32>> = HashMap::new();
706
707    // Add symbols with entry point detection
708    for sym in &data.symbols {
709        let id = index.next_symbol_id();
710        let name_offset = index.strings.intern(&sym.name);
711
712        let kind = match sym.kind {
713            super::extract::SymbolKind::Function => SymbolKind::Function,
714            super::extract::SymbolKind::Method => SymbolKind::Method,
715            super::extract::SymbolKind::Class => SymbolKind::Class,
716            super::extract::SymbolKind::Struct => SymbolKind::Struct,
717            super::extract::SymbolKind::Enum => SymbolKind::Enum,
718            super::extract::SymbolKind::Interface => SymbolKind::Interface,
719            super::extract::SymbolKind::TypeAlias => SymbolKind::TypeAlias,
720            super::extract::SymbolKind::Constant => SymbolKind::Constant,
721            super::extract::SymbolKind::Variable => SymbolKind::Variable,
722            super::extract::SymbolKind::Module => SymbolKind::Module,
723            // Trait maps to Interface (closest semantic match)
724            super::extract::SymbolKind::Trait => SymbolKind::Interface,
725            // Impl blocks are not tracked as standalone symbols
726            super::extract::SymbolKind::Impl => SymbolKind::Unknown,
727        };
728
729        let mut flags = SymbolFlags::empty();
730        if sym.is_exported {
731            flags |= SymbolFlags::IS_EXPORTED;
732        }
733        if sym.is_async {
734            flags |= SymbolFlags::IS_ASYNC;
735        }
736
737        // Detect entry points using the same logic as the builder
738        let is_entry_point =
739            detect_entry_point_standalone(&sym.name, kind, sym.is_exported, Some(&rel_path));
740        if is_entry_point {
741            flags |= SymbolFlags::IS_ENTRY_POINT;
742        }
743
744        let symbol = Symbol::new(
745            id,
746            name_offset,
747            file_id,
748            kind,
749            flags,
750            sym.start_line,
751            sym.end_line,
752        );
753
754        index.add_symbol(symbol, &sym.name);
755        new_symbol_ids.push(id);
756        symbol_lookup.entry(sym.name.clone()).or_default().push(id);
757    }
758
759    // Add tokens
760    for tok in &data.tokens {
761        let id = index.next_token_id();
762        let name_offset = index.strings.intern(&tok.name);
763
764        let kind = match tok.kind {
765            super::extract::TokenKind::Identifier => TokenKind::Identifier,
766            // Keywords, Operators, Literals, Comments map to Unknown (no semantic index equivalent)
767            super::extract::TokenKind::Keyword => TokenKind::Unknown,
768            super::extract::TokenKind::Operator => TokenKind::Unknown,
769            super::extract::TokenKind::Literal => TokenKind::Unknown,
770            super::extract::TokenKind::Comment => TokenKind::Unknown,
771            super::extract::TokenKind::Unknown => TokenKind::Unknown,
772        };
773
774        let token = Token::new(id, name_offset, file_id, tok.line, tok.column, kind, 0);
775        index.add_token(token, &tok.name);
776    }
777
778    // Add scopes
779    for scope in data.scopes.iter() {
780        let id = index.next_scope_id();
781        let kind = match scope.kind {
782            super::extract::ScopeKind::File => ScopeKind::File,
783            super::extract::ScopeKind::Module => ScopeKind::Module,
784            super::extract::ScopeKind::Class => ScopeKind::Class,
785            super::extract::ScopeKind::Function => ScopeKind::Function,
786            super::extract::ScopeKind::Block => ScopeKind::Block,
787            // Loop and Conditional map to Block (generic block scope)
788            super::extract::ScopeKind::Loop => ScopeKind::Block,
789            super::extract::ScopeKind::Conditional => ScopeKind::Block,
790        };
791
792        let parent_id = scope.parent_index.map(|i| i as u32).unwrap_or(u32::MAX);
793        let name_offset = scope
794            .name
795            .as_ref()
796            .map(|n| index.strings.intern(n))
797            .unwrap_or(0);
798
799        let scope_obj = Scope::new(
800            id,
801            kind,
802            file_id,
803            parent_id,
804            scope.start_line,
805            scope.end_line,
806            name_offset,
807        );
808        index.add_scope(scope_obj);
809    }
810
811    // Add call tokens and resolve edges
812    for call in &data.calls {
813        let token_id = index.next_token_id();
814        let name_offset = index.strings.intern(&call.callee_name);
815
816        let token = Token::new(
817            token_id,
818            name_offset,
819            file_id,
820            call.line,
821            call.column,
822            TokenKind::Call,
823            0,
824        );
825        index.add_token(token, &call.callee_name);
826
827        // Find the caller (containing symbol in this file)
828        let caller_id = find_containing_symbol_in_file(index, file_id, call.line);
829
830        // Find target symbols (could be in this file or other files)
831        let target_ids: Vec<u32> = if let Some(ids) = symbol_lookup.get(&call.callee_name) {
832            // Target is in the same file we're updating
833            ids.clone()
834        } else if let Some(ids) = index.symbols_by_name(&call.callee_name) {
835            // Target is in another file
836            ids.to_vec()
837        } else {
838            Vec::new()
839        };
840
841        if let Some(caller_id) = caller_id {
842            for &target_id in &target_ids {
843                if caller_id != target_id {
844                    index.add_edge(Edge::new(caller_id, target_id, call.line));
845                }
846            }
847
848            for &target_id in &target_ids {
849                index.add_reference(Reference::new(token_id, target_id, RefKind::Call));
850            }
851        }
852    }
853
854    // Add construction references
855    for ref_item in &data.references {
856        if ref_item.kind != super::extract::RefKind::Construction {
857            continue;
858        }
859
860        let token_id = index.next_token_id();
861        let name_offset = index.strings.intern(&ref_item.name);
862
863        let token = Token::new(
864            token_id,
865            name_offset,
866            file_id,
867            ref_item.line,
868            ref_item.column,
869            TokenKind::Type,
870            0,
871        );
872        index.add_token(token, &ref_item.name);
873
874        // Find target symbols
875        let target_ids: Vec<u32> = if let Some(ids) = symbol_lookup.get(&ref_item.name) {
876            ids.clone()
877        } else if let Some(ids) = index.symbols_by_name(&ref_item.name) {
878            ids.to_vec()
879        } else {
880            Vec::new()
881        };
882
883        for &target_id in &target_ids {
884            index.add_reference(Reference::new(token_id, target_id, RefKind::Construction));
885        }
886    }
887
888    let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
889
890    debug!(
891        file = %rel_path.display(),
892        symbols_added = new_symbol_ids.len(),
893        elapsed_ms = elapsed_ms,
894        "Incremental update complete"
895    );
896
897    IncrementalUpdateResult {
898        symbols_removed: 0, // TODO: track this properly
899        symbols_added: new_symbol_ids.len(),
900        edges_count: index.edges.len(),
901        elapsed_ms,
902    }
903}
904
905/// Remove a file from the index (e.g., when file is deleted)
906///
907/// # Arguments
908/// * `index` - The SemanticIndex to update
909/// * `project_root` - Project root path
910/// * `path` - Path to the deleted file
911///
912/// # Returns
913/// Number of symbols removed, or 0 if file wasn't in index
914pub fn remove_file_from_index(
915    index: &mut SemanticIndex,
916    project_root: &Path,
917    path: &Path,
918) -> usize {
919    let rel_path = path
920        .strip_prefix(project_root)
921        .unwrap_or(path)
922        .to_path_buf();
923
924    if let Some(file_id) = index.file_id_for_path(&rel_path) {
925        let removed = index.remove_file_data(file_id);
926        debug!(
927            file = %rel_path.display(),
928            removed_symbols = removed,
929            "Removed file from index"
930        );
931        removed
932    } else {
933        debug!(file = %rel_path.display(), "File not in index, nothing to remove");
934        0
935    }
936}
937
938/// Standalone entry point detection for incremental updates
939fn detect_entry_point_standalone(
940    name: &str,
941    kind: SymbolKind,
942    is_exported: bool,
943    file_path: Option<&Path>,
944) -> bool {
945    // Only functions and methods can be entry points
946    if !matches!(kind, SymbolKind::Function | SymbolKind::Method) {
947        return false;
948    }
949
950    // main() is always an entry point
951    if name == "main" {
952        return true;
953    }
954
955    // Exported functions/methods are entry points
956    if is_exported {
957        return true;
958    }
959
960    // Check file path patterns for entry points
961    if let Some(path) = file_path {
962        let path_str = path.to_string_lossy().to_lowercase();
963        let file_name = path
964            .file_name()
965            .map(|n| n.to_string_lossy().to_lowercase())
966            .unwrap_or_default();
967
968        // Rust: public items in lib.rs are entry points
969        if file_name == "lib.rs" {
970            return true;
971        }
972
973        // Test files: test functions are entry points
974        if path_str.contains("test")
975            || path_str.contains("spec")
976            || file_name.starts_with("test_")
977            || file_name.ends_with("_test.rs")
978        {
979            return true;
980        }
981
982        // Benchmark files
983        if path_str.contains("bench") {
984            return true;
985        }
986
987        // TypeScript/JavaScript: index files and handlers
988        if file_name == "index.ts"
989            || file_name == "index.js"
990            || file_name == "index.tsx"
991            || file_name == "index.jsx"
992        {
993            return true;
994        }
995
996        // Check for common handler patterns
997        if matches!(
998            name,
999            "handler" | "default" | "GET" | "POST" | "PUT" | "DELETE" | "PATCH"
1000        ) {
1001            return true;
1002        }
1003    }
1004
1005    // Python: test_ prefixed functions are entry points
1006    if name.starts_with("test_") {
1007        return true;
1008    }
1009
1010    false
1011}
1012
1013/// Find the symbol containing a given line in a specific file
1014fn find_containing_symbol_in_file(index: &SemanticIndex, file_id: u16, line: u32) -> Option<u32> {
1015    let mut best: Option<(u32, u32)> = None;
1016
1017    for symbol in &index.symbols {
1018        if symbol.file_id == file_id && symbol.start_line <= line && symbol.end_line >= line {
1019            let size = symbol.end_line - symbol.start_line;
1020            match best {
1021                None => best = Some((symbol.id, size)),
1022                Some((_, best_size)) if size < best_size => best = Some((symbol.id, size)),
1023                _ => {}
1024            }
1025        }
1026    }
1027
1028    best.map(|(id, _)| id)
1029}
1030
1031// =============================================================================
1032// TESTS
1033// =============================================================================
1034
1035#[cfg(test)]
1036mod tests {
1037    use super::*;
1038    use tempfile::tempdir;
1039
1040    #[test]
1041    fn test_builder_basic() {
1042        let dir = tempdir().unwrap();
1043        let mut builder = SemanticIndexBuilder::new(dir.path());
1044
1045        let code = r#"
1046function greet(name: string): string {
1047    return `Hello, ${name}!`;
1048}
1049
1050function main() {
1051    greet("World");
1052}
1053"#;
1054
1055        let path = dir.path().join("test.ts");
1056        builder.add_file(&path, code);
1057
1058        let index = builder.build();
1059
1060        assert!(!index.symbols.is_empty(), "Should have symbols");
1061        assert!(
1062            index.symbols_by_name("greet").is_some(),
1063            "Should find greet"
1064        );
1065        assert!(index.symbols_by_name("main").is_some(), "Should find main");
1066    }
1067
1068    #[test]
1069    fn test_builder_call_resolution() {
1070        let dir = tempdir().unwrap();
1071        let mut builder = SemanticIndexBuilder::new(dir.path());
1072
1073        let code = r#"
1074function helper() {
1075    return 42;
1076}
1077
1078function main() {
1079    const x = helper();
1080    return x;
1081}
1082"#;
1083
1084        let path = dir.path().join("test.ts");
1085        builder.add_file(&path, code);
1086
1087        let index = builder.build();
1088
1089        // Check that we have edges
1090        let main_ids = index.symbols_by_name("main").unwrap();
1091        let helper_ids = index.symbols_by_name("helper").unwrap();
1092
1093        assert!(!main_ids.is_empty());
1094        assert!(!helper_ids.is_empty());
1095
1096        // main should call helper
1097        let callees = index.callees(main_ids[0]);
1098        assert!(callees.contains(&helper_ids[0]), "main should call helper");
1099    }
1100
1101    #[test]
1102    fn test_incremental_update_add_file() {
1103        let dir = tempdir().unwrap();
1104        let project_root = dir.path();
1105
1106        // Build initial index with one file
1107        let mut builder = SemanticIndexBuilder::new(project_root);
1108        let file1 = project_root.join("file1.ts");
1109        let code1 = "function foo() { return 1; }";
1110        builder.add_file(&file1, code1);
1111        let mut index = builder.build();
1112
1113        let initial_symbols = index.symbols.len();
1114        assert!(index.symbols_by_name("foo").is_some());
1115
1116        // Incrementally add a second file
1117        let file2 = project_root.join("file2.ts");
1118        let code2 = "function bar() { return 2; }";
1119        let result = update_file_incremental(&mut index, project_root, &file2, code2);
1120
1121        assert!(result.symbols_added >= 1, "Should add at least one symbol");
1122        assert!(index.symbols_by_name("bar").is_some(), "Should find bar");
1123        assert!(
1124            index.symbols.len() > initial_symbols,
1125            "Should have more symbols"
1126        );
1127    }
1128
1129    #[test]
1130    fn test_incremental_update_modify_file() {
1131        let dir = tempdir().unwrap();
1132        let project_root = dir.path();
1133
1134        // Build initial index
1135        let mut builder = SemanticIndexBuilder::new(project_root);
1136        let file1 = project_root.join("file1.ts");
1137        let code1 = "function foo() { return 1; }";
1138        builder.add_file(&file1, code1);
1139        let mut index = builder.build();
1140
1141        assert!(index.symbols_by_name("foo").is_some());
1142        assert!(index.symbols_by_name("baz").is_none());
1143
1144        // Update the same file with different content
1145        let code2 = "function baz() { return 2; }";
1146        let result = update_file_incremental(&mut index, project_root, &file1, code2);
1147
1148        assert!(result.symbols_added >= 1);
1149        assert!(index.symbols_by_name("baz").is_some(), "Should find baz");
1150        // Note: foo may still be in index since we don't remove stale symbols from Vec
1151        // but the lookup should only find baz for new queries
1152    }
1153
1154    #[test]
1155    fn test_remove_file_from_index() {
1156        let dir = tempdir().unwrap();
1157        let project_root = dir.path();
1158
1159        // Build initial index with two files
1160        let mut builder = SemanticIndexBuilder::new(project_root);
1161        let file1 = project_root.join("file1.ts");
1162        let file2 = project_root.join("file2.ts");
1163        builder.add_file(&file1, "function foo() {}");
1164        builder.add_file(&file2, "function bar() {}");
1165        let mut index = builder.build();
1166
1167        assert!(index.symbols_by_name("foo").is_some());
1168        assert!(index.symbols_by_name("bar").is_some());
1169
1170        // Remove file1
1171        let removed = remove_file_from_index(&mut index, project_root, &file1);
1172        assert!(removed >= 1, "Should remove at least one symbol");
1173
1174        // foo should be gone from lookup
1175        assert!(
1176            index
1177                .symbols_by_name("foo")
1178                .map(|s| s.is_empty())
1179                .unwrap_or(true),
1180            "foo should be removed from lookup"
1181        );
1182        // bar should still be there
1183        assert!(
1184            index
1185                .symbols_by_name("bar")
1186                .map(|s| !s.is_empty())
1187                .unwrap_or(false),
1188            "bar should still be in index"
1189        );
1190    }
1191
1192    #[test]
1193    fn test_incremental_update_performance() {
1194        let dir = tempdir().unwrap();
1195        let project_root = dir.path();
1196
1197        // Build a small index
1198        let mut builder = SemanticIndexBuilder::with_capacity(project_root, 10);
1199        for i in 0..10 {
1200            let path = project_root.join(format!("file{}.ts", i));
1201            let code = format!("function func{}() {{ return {}; }}", i, i);
1202            builder.add_file(&path, &code);
1203        }
1204        let mut index = builder.build();
1205
1206        // Update a single file and check it's fast
1207        let update_path = project_root.join("file0.ts");
1208        let new_code = "function updatedFunc() { return 42; }";
1209        let result = update_file_incremental(&mut index, project_root, &update_path, new_code);
1210
1211        // Should complete in under 100ms (target is 20-50ms)
1212        assert!(
1213            result.elapsed_ms < 100.0,
1214            "Incremental update should be fast (<100ms), was {}ms",
1215            result.elapsed_ms
1216        );
1217    }
1218}