Skip to main content

infiniloom_engine/index/
types.rs

1//! Core data structures for the Git context index.
2//!
3//! This module defines the types used to build and query a pre-computed
4//! index of symbols, files, and their relationships for fast diff context.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::ops::Range;
9
10/// Type-safe symbol ID to prevent mixing with other integer types.
11/// Use `SymbolId::new()` to create and `id.0` or `id.as_u32()` to access.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
13#[repr(transparent)]
14pub struct SymbolId(pub u32);
15
16impl SymbolId {
17    /// Create a new SymbolId
18    #[inline]
19    pub const fn new(id: u32) -> Self {
20        Self(id)
21    }
22
23    /// Get the underlying u32 value
24    #[inline]
25    pub const fn as_u32(self) -> u32 {
26        self.0
27    }
28}
29
30impl From<u32> for SymbolId {
31    #[inline]
32    fn from(id: u32) -> Self {
33        Self(id)
34    }
35}
36
37impl From<SymbolId> for u32 {
38    #[inline]
39    fn from(id: SymbolId) -> Self {
40        id.0
41    }
42}
43
44impl std::fmt::Display for SymbolId {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        write!(f, "sym#{}", self.0)
47    }
48}
49
50/// Type-safe file ID to prevent mixing with other integer types.
51/// Use `FileId::new()` to create and `id.0` or `id.as_u32()` to access.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
53#[repr(transparent)]
54pub struct FileId(pub u32);
55
56impl FileId {
57    /// Create a new FileId
58    #[inline]
59    pub const fn new(id: u32) -> Self {
60        Self(id)
61    }
62
63    /// Get the underlying u32 value
64    #[inline]
65    pub const fn as_u32(self) -> u32 {
66        self.0
67    }
68}
69
70impl From<u32> for FileId {
71    #[inline]
72    fn from(id: u32) -> Self {
73        Self(id)
74    }
75}
76
77impl From<FileId> for u32 {
78    #[inline]
79    fn from(id: FileId) -> Self {
80        id.0
81    }
82}
83
84impl std::fmt::Display for FileId {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        write!(f, "file#{}", self.0)
87    }
88}
89
90/// A symbol in the index with unique ID for graph operations.
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct IndexSymbol {
93    /// Unique symbol ID within this index
94    pub id: SymbolId,
95    /// Symbol name
96    pub name: String,
97    /// Symbol kind
98    pub kind: IndexSymbolKind,
99    /// File ID containing this symbol
100    pub file_id: FileId,
101    /// Source span (line/column positions)
102    pub span: Span,
103    /// Full signature for functions/methods
104    pub signature: Option<String>,
105    /// Parent symbol ID (for methods inside classes)
106    pub parent: Option<SymbolId>,
107    /// Visibility modifier
108    pub visibility: Visibility,
109    /// Documentation string
110    pub docstring: Option<String>,
111}
112
113/// Symbol kind for the index (extended from core SymbolKind)
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub enum IndexSymbolKind {
116    Function,
117    Method,
118    Class,
119    Struct,
120    Interface,
121    Trait,
122    Enum,
123    Constant,
124    Variable,
125    Module,
126    Import,
127    Export,
128    TypeAlias,
129    Macro,
130}
131
132impl IndexSymbolKind {
133    pub fn name(&self) -> &'static str {
134        match self {
135            Self::Function => "function",
136            Self::Method => "method",
137            Self::Class => "class",
138            Self::Struct => "struct",
139            Self::Interface => "interface",
140            Self::Trait => "trait",
141            Self::Enum => "enum",
142            Self::Constant => "constant",
143            Self::Variable => "variable",
144            Self::Module => "module",
145            Self::Import => "import",
146            Self::Export => "export",
147            Self::TypeAlias => "type",
148            Self::Macro => "macro",
149        }
150    }
151
152    /// Check if this symbol kind defines a scope (can contain other symbols)
153    pub fn is_scope(&self) -> bool {
154        matches!(
155            self,
156            Self::Class | Self::Struct | Self::Interface | Self::Trait | Self::Module | Self::Enum
157        )
158    }
159}
160
161/// Visibility modifier for symbols
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
163pub enum Visibility {
164    #[default]
165    Public,
166    Private,
167    Protected,
168    Internal,
169}
170
171/// Source code span (start and end positions)
172#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
173pub struct Span {
174    pub start_line: u32,
175    pub start_col: u16,
176    pub end_line: u32,
177    pub end_col: u16,
178}
179
180impl Span {
181    pub fn new(start_line: u32, start_col: u16, end_line: u32, end_col: u16) -> Self {
182        Self { start_line, start_col, end_line, end_col }
183    }
184
185    /// Check if a line falls within this span
186    pub fn contains_line(&self, line: u32) -> bool {
187        line >= self.start_line && line <= self.end_line
188    }
189
190    /// Number of lines in this span
191    pub fn line_count(&self) -> u32 {
192        if self.end_line >= self.start_line {
193            self.end_line - self.start_line + 1
194        } else {
195            1
196        }
197    }
198}
199
200/// A file entry in the index
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct FileEntry {
203    /// Unique file ID
204    pub id: FileId,
205    /// Relative path from repository root
206    pub path: String,
207    /// Detected language
208    pub language: Language,
209    /// BLAKE3 content hash for change detection
210    pub content_hash: [u8; 32],
211    /// Index range into the symbols vector (raw u32 for Range compatibility)
212    pub symbols: Range<u32>,
213    /// Import statements in this file
214    pub imports: Vec<Import>,
215    /// Number of lines
216    pub lines: u32,
217    /// Pre-computed token count (Claude model)
218    pub tokens: u32,
219}
220
221/// Detected programming language
222#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
223pub enum Language {
224    Rust,
225    Python,
226    JavaScript,
227    TypeScript,
228    Go,
229    Java,
230    C,
231    Cpp,
232    CSharp,
233    Ruby,
234    Bash,
235    Php,
236    Kotlin,
237    Swift,
238    Scala,
239    Haskell,
240    Elixir,
241    Clojure,
242    OCaml,
243    Lua,
244    R,
245    Zig,
246    Dart,
247    Hcl,
248    FSharp,
249    #[default]
250    Unknown,
251}
252
253impl Language {
254    pub fn from_extension(ext: &str) -> Self {
255        match ext.to_lowercase().as_str() {
256            "rs" => Self::Rust,
257            "py" | "pyi" | "pyw" => Self::Python,
258            "js" | "mjs" | "cjs" => Self::JavaScript,
259            "ts" | "mts" | "cts" => Self::TypeScript,
260            "tsx" | "jsx" => Self::TypeScript,
261            "go" => Self::Go,
262            "java" => Self::Java,
263            "c" | "h" => Self::C,
264            "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Self::Cpp,
265            "cs" => Self::CSharp,
266            "rb" => Self::Ruby,
267            "sh" | "bash" | "zsh" => Self::Bash,
268            "php" | "php3" | "php4" | "php5" | "phtml" => Self::Php,
269            "kt" | "kts" => Self::Kotlin,
270            "swift" => Self::Swift,
271            "scala" | "sc" => Self::Scala,
272            "hs" | "lhs" => Self::Haskell,
273            "ex" | "exs" => Self::Elixir,
274            "clj" | "cljs" | "cljc" | "edn" => Self::Clojure,
275            "ml" | "mli" => Self::OCaml,
276            "lua" => Self::Lua,
277            "r" | "rmd" => Self::R,
278            "zig" | "zon" => Self::Zig,
279            "dart" => Self::Dart,
280            "tf" | "hcl" | "tfvars" => Self::Hcl,
281            "fs" | "fsi" | "fsx" | "fsscript" => Self::FSharp,
282            _ => Self::Unknown,
283        }
284    }
285
286    pub fn name(&self) -> &'static str {
287        match self {
288            Self::Rust => "rust",
289            Self::Python => "python",
290            Self::JavaScript => "javascript",
291            Self::TypeScript => "typescript",
292            Self::Go => "go",
293            Self::Java => "java",
294            Self::C => "c",
295            Self::Cpp => "cpp",
296            Self::CSharp => "csharp",
297            Self::Ruby => "ruby",
298            Self::Bash => "bash",
299            Self::Php => "php",
300            Self::Kotlin => "kotlin",
301            Self::Swift => "swift",
302            Self::Scala => "scala",
303            Self::Haskell => "haskell",
304            Self::Elixir => "elixir",
305            Self::Clojure => "clojure",
306            Self::OCaml => "ocaml",
307            Self::Lua => "lua",
308            Self::R => "r",
309            Self::Zig => "zig",
310            Self::Dart => "dart",
311            Self::Hcl => "hcl",
312            Self::FSharp => "fsharp",
313            Self::Unknown => "unknown",
314        }
315    }
316}
317
318/// An import statement
319#[derive(Debug, Clone, Serialize, Deserialize)]
320pub struct Import {
321    /// Source path or module name (e.g., "src/utils" or "lodash")
322    pub source: String,
323    /// Resolved file ID if it's an internal import
324    pub resolved_file: Option<u32>,
325    /// Specific symbols imported (empty for wildcard imports)
326    pub symbols: Vec<String>,
327    /// Source span
328    pub span: Span,
329    /// Whether this is an external dependency
330    pub is_external: bool,
331}
332
333/// Main symbol index structure
334#[derive(Debug, Clone, Serialize, Deserialize)]
335pub struct SymbolIndex {
336    /// Index version (for compatibility checking)
337    pub version: u32,
338    /// Repository name
339    pub repo_name: String,
340    /// Git commit hash when index was built
341    pub commit_hash: Option<String>,
342    /// Timestamp of index creation
343    pub created_at: u64,
344    /// All files in the repository
345    pub files: Vec<FileEntry>,
346    /// All symbols across all files
347    pub symbols: Vec<IndexSymbol>,
348
349    // Lookup tables (built on load, not serialized)
350    #[serde(skip)]
351    pub file_by_path: HashMap<String, u32>,
352    #[serde(skip)]
353    pub symbols_by_name: HashMap<String, Vec<u32>>,
354}
355
356impl Default for SymbolIndex {
357    fn default() -> Self {
358        Self::new()
359    }
360}
361
362impl SymbolIndex {
363    pub const CURRENT_VERSION: u32 = 2;
364
365    pub fn new() -> Self {
366        Self {
367            version: Self::CURRENT_VERSION,
368            repo_name: String::new(),
369            commit_hash: None,
370            created_at: 0,
371            files: Vec::new(),
372            symbols: Vec::new(),
373            file_by_path: HashMap::new(),
374            symbols_by_name: HashMap::new(),
375        }
376    }
377
378    /// Rebuild lookup tables after deserialization
379    pub fn rebuild_lookups(&mut self) {
380        self.file_by_path.clear();
381        self.symbols_by_name.clear();
382
383        for file in &self.files {
384            self.file_by_path
385                .insert(file.path.clone(), file.id.as_u32());
386        }
387
388        for symbol in &self.symbols {
389            self.symbols_by_name
390                .entry(symbol.name.clone())
391                .or_default()
392                .push(symbol.id.as_u32());
393        }
394    }
395
396    /// Get file by path
397    pub fn get_file(&self, path: &str) -> Option<&FileEntry> {
398        self.file_by_path
399            .get(path)
400            .and_then(|&id| self.files.get(id as usize))
401    }
402
403    /// Get file by ID
404    pub fn get_file_by_id(&self, id: u32) -> Option<&FileEntry> {
405        self.files.get(id as usize)
406    }
407
408    /// Get symbol by ID
409    pub fn get_symbol(&self, id: u32) -> Option<&IndexSymbol> {
410        self.symbols.get(id as usize)
411    }
412
413    /// Get all symbols in a file
414    pub fn get_file_symbols(&self, file_id: FileId) -> &[IndexSymbol] {
415        if let Some(file) = self.get_file_by_id(file_id.as_u32()) {
416            &self.symbols[file.symbols.start as usize..file.symbols.end as usize]
417        } else {
418            &[]
419        }
420    }
421
422    /// Find symbols by name
423    pub fn find_symbols(&self, name: &str) -> Vec<&IndexSymbol> {
424        self.symbols_by_name
425            .get(name)
426            .map(|ids| ids.iter().filter_map(|&id| self.get_symbol(id)).collect())
427            .unwrap_or_default()
428    }
429
430    /// Find symbol containing a specific line in a file
431    pub fn find_symbol_at_line(&self, file_id: FileId, line: u32) -> Option<&IndexSymbol> {
432        self.get_file_symbols(file_id)
433            .iter()
434            .filter(|s| s.span.contains_line(line))
435            // Return the innermost (smallest) symbol containing the line
436            .min_by_key(|s| s.span.line_count())
437    }
438}
439
440/// Dependency graph for impact analysis
441///
442/// Uses both edge lists (for serialization) and adjacency maps (for O(1) queries).
443/// The adjacency maps are rebuilt after deserialization via `rebuild_adjacency_maps()`.
444#[derive(Debug, Clone, Default, Serialize, Deserialize)]
445pub struct DepGraph {
446    // Forward edges: X depends on Y
447    /// File imports: (file_id, imported_file_id)
448    pub file_imports: Vec<(u32, u32)>,
449    /// Symbol references: (symbol_id, referenced_symbol_id)
450    pub symbol_refs: Vec<(u32, u32)>,
451
452    // Reverse edges: Y is depended on by X
453    /// File imported by: (file_id, importing_file_id)
454    pub file_imported_by: Vec<(u32, u32)>,
455    /// Symbol referenced by: (symbol_id, referencing_symbol_id)
456    pub symbol_ref_by: Vec<(u32, u32)>,
457
458    // Call graph
459    /// Function calls: (caller_symbol_id, callee_symbol_id)
460    pub calls: Vec<(u32, u32)>,
461    /// Called by: (callee_symbol_id, caller_symbol_id)
462    pub called_by: Vec<(u32, u32)>,
463
464    // Pre-computed metrics
465    /// PageRank importance score per file
466    pub file_pagerank: Vec<f32>,
467    /// PageRank importance score per symbol
468    pub symbol_pagerank: Vec<f32>,
469
470    // ===== Adjacency maps for O(1) lookups (not serialized, rebuilt on load) =====
471    /// file_id -> list of files it imports
472    #[serde(skip)]
473    pub imports_adj: HashMap<u32, Vec<u32>>,
474    /// file_id -> list of files that import it
475    #[serde(skip)]
476    pub imported_by_adj: HashMap<u32, Vec<u32>>,
477    /// symbol_id -> list of symbols it references
478    #[serde(skip)]
479    pub refs_adj: HashMap<u32, Vec<u32>>,
480    /// symbol_id -> list of symbols that reference it
481    #[serde(skip)]
482    pub ref_by_adj: HashMap<u32, Vec<u32>>,
483    /// caller_id -> list of callees
484    #[serde(skip)]
485    pub callees_adj: HashMap<u32, Vec<u32>>,
486    /// callee_id -> list of callers
487    #[serde(skip)]
488    pub callers_adj: HashMap<u32, Vec<u32>>,
489}
490
491impl DepGraph {
492    pub fn new() -> Self {
493        Self::default()
494    }
495
496    /// Rebuild adjacency maps from edge lists.
497    /// Call this after deserializing a DepGraph.
498    pub fn rebuild_adjacency_maps(&mut self) {
499        self.imports_adj.clear();
500        self.imported_by_adj.clear();
501        self.refs_adj.clear();
502        self.ref_by_adj.clear();
503        self.callees_adj.clear();
504        self.callers_adj.clear();
505
506        // Rebuild file import adjacency
507        for &(from, to) in &self.file_imports {
508            self.imports_adj.entry(from).or_default().push(to);
509        }
510        for &(file, importer) in &self.file_imported_by {
511            self.imported_by_adj.entry(file).or_default().push(importer);
512        }
513
514        // Rebuild symbol reference adjacency
515        for &(from, to) in &self.symbol_refs {
516            self.refs_adj.entry(from).or_default().push(to);
517        }
518        for &(symbol, referencer) in &self.symbol_ref_by {
519            self.ref_by_adj.entry(symbol).or_default().push(referencer);
520        }
521
522        // Rebuild call graph adjacency
523        for &(caller, callee) in &self.calls {
524            self.callees_adj.entry(caller).or_default().push(callee);
525        }
526        for &(callee, caller) in &self.called_by {
527            self.callers_adj.entry(callee).or_default().push(caller);
528        }
529    }
530
531    /// Add a file import edge
532    pub fn add_file_import(&mut self, from_file: u32, to_file: u32) {
533        self.file_imports.push((from_file, to_file));
534        self.file_imported_by.push((to_file, from_file));
535        // Update adjacency maps
536        self.imports_adj.entry(from_file).or_default().push(to_file);
537        self.imported_by_adj
538            .entry(to_file)
539            .or_default()
540            .push(from_file);
541    }
542
543    /// Add a symbol reference edge
544    pub fn add_symbol_ref(&mut self, from_symbol: u32, to_symbol: u32) {
545        self.symbol_refs.push((from_symbol, to_symbol));
546        self.symbol_ref_by.push((to_symbol, from_symbol));
547        // Update adjacency maps
548        self.refs_adj
549            .entry(from_symbol)
550            .or_default()
551            .push(to_symbol);
552        self.ref_by_adj
553            .entry(to_symbol)
554            .or_default()
555            .push(from_symbol);
556    }
557
558    /// Add a function call edge
559    pub fn add_call(&mut self, caller: u32, callee: u32) {
560        self.calls.push((caller, callee));
561        self.called_by.push((callee, caller));
562        // Update adjacency maps
563        self.callees_adj.entry(caller).or_default().push(callee);
564        self.callers_adj.entry(callee).or_default().push(caller);
565    }
566
567    /// Get files that import a given file (O(1) lookup)
568    pub fn get_importers(&self, file_id: u32) -> Vec<u32> {
569        self.imported_by_adj
570            .get(&file_id)
571            .cloned()
572            .unwrap_or_default()
573    }
574
575    /// Get files that a given file imports (O(1) lookup)
576    pub fn get_imports(&self, file_id: u32) -> Vec<u32> {
577        self.imports_adj.get(&file_id).cloned().unwrap_or_default()
578    }
579
580    /// Get symbols that reference a given symbol (O(1) lookup)
581    pub fn get_referencers(&self, symbol_id: u32) -> Vec<u32> {
582        self.ref_by_adj.get(&symbol_id).cloned().unwrap_or_default()
583    }
584
585    /// Get callers of a function (O(1) lookup)
586    pub fn get_callers(&self, symbol_id: u32) -> Vec<u32> {
587        self.callers_adj
588            .get(&symbol_id)
589            .cloned()
590            .unwrap_or_default()
591    }
592
593    /// Get callees of a function (O(1) lookup)
594    pub fn get_callees(&self, symbol_id: u32) -> Vec<u32> {
595        self.callees_adj
596            .get(&symbol_id)
597            .cloned()
598            .unwrap_or_default()
599    }
600
601    /// Check if adjacency maps are populated (used to detect if rebuild is needed)
602    pub fn needs_rebuild(&self) -> bool {
603        // If we have edges but no adjacency data, rebuild is needed
604        (!self.file_imports.is_empty() && self.imports_adj.is_empty())
605            || (!self.calls.is_empty() && self.callees_adj.is_empty())
606    }
607}
608
609/// A reference to a symbol (for tracking usages)
610#[derive(Debug, Clone, Serialize, Deserialize)]
611pub struct Reference {
612    /// Referenced symbol ID
613    pub symbol_id: u32,
614    /// File containing the reference
615    pub file_id: u32,
616    /// Location of the reference
617    pub span: Span,
618    /// Kind of reference
619    pub kind: RefKind,
620}
621
622/// Kind of reference
623#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
624pub enum RefKind {
625    /// Function/method call
626    Call,
627    /// Variable read
628    Read,
629    /// Variable write
630    Write,
631    /// Import statement
632    Import,
633    /// Type annotation
634    TypeRef,
635    /// Class inheritance
636    Inheritance,
637}
638
639#[cfg(test)]
640mod tests {
641    use super::*;
642
643    #[test]
644    fn test_span_contains_line() {
645        let span = Span::new(10, 0, 20, 0);
646        assert!(span.contains_line(10));
647        assert!(span.contains_line(15));
648        assert!(span.contains_line(20));
649        assert!(!span.contains_line(9));
650        assert!(!span.contains_line(21));
651    }
652
653    #[test]
654    fn test_language_from_extension() {
655        assert_eq!(Language::from_extension("rs"), Language::Rust);
656        assert_eq!(Language::from_extension("py"), Language::Python);
657        assert_eq!(Language::from_extension("ts"), Language::TypeScript);
658        assert_eq!(Language::from_extension("xyz"), Language::Unknown);
659    }
660
661    #[test]
662    fn test_symbol_index_lookups() {
663        let mut index = SymbolIndex::new();
664        index.files.push(FileEntry {
665            id: FileId::new(0),
666            path: "src/main.rs".to_owned(),
667            language: Language::Rust,
668            content_hash: [0; 32],
669            symbols: 0..2,
670            imports: vec![],
671            lines: 100,
672            tokens: 500,
673        });
674        index.symbols.push(IndexSymbol {
675            id: SymbolId::new(0),
676            name: "main".to_owned(),
677            kind: IndexSymbolKind::Function,
678            file_id: FileId::new(0),
679            span: Span::new(1, 0, 10, 0),
680            signature: Some("fn main()".to_owned()),
681            parent: None,
682            visibility: Visibility::Public,
683            docstring: None,
684        });
685        index.symbols.push(IndexSymbol {
686            id: SymbolId::new(1),
687            name: "helper".to_owned(),
688            kind: IndexSymbolKind::Function,
689            file_id: FileId::new(0),
690            span: Span::new(15, 0, 25, 0),
691            signature: Some("fn helper()".to_owned()),
692            parent: None,
693            visibility: Visibility::Private,
694            docstring: None,
695        });
696
697        index.rebuild_lookups();
698
699        assert!(index.get_file("src/main.rs").is_some());
700        assert!(index.get_file("nonexistent.rs").is_none());
701
702        let main_symbols = index.find_symbols("main");
703        assert_eq!(main_symbols.len(), 1);
704        assert_eq!(main_symbols[0].name, "main");
705
706        let symbol = index.find_symbol_at_line(FileId::new(0), 5);
707        assert!(symbol.is_some());
708        assert_eq!(symbol.unwrap().name, "main");
709
710        let symbol = index.find_symbol_at_line(FileId::new(0), 20);
711        assert!(symbol.is_some());
712        assert_eq!(symbol.unwrap().name, "helper");
713    }
714
715    #[test]
716    fn test_dep_graph() {
717        let mut graph = DepGraph::new();
718        graph.add_file_import(0, 1);
719        graph.add_file_import(0, 2);
720        graph.add_file_import(1, 2);
721
722        assert_eq!(graph.get_imports(0), vec![1, 2]);
723        assert_eq!(graph.get_importers(2), vec![0, 1]);
724
725        graph.add_call(10, 20);
726        graph.add_call(10, 21);
727
728        assert_eq!(graph.get_callees(10), vec![20, 21]);
729        assert_eq!(graph.get_callers(20), vec![10]);
730    }
731}