Skip to main content

infiniloom_engine/index/
types.rs

1//! Core data structures for the Git context index.
2//!
3//! This module defines the types used to build and query a pre-computed
4//! index of symbols, files, and their relationships for fast diff context.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::ops::Range;
9
10/// Type-safe symbol ID to prevent mixing with other integer types.
11/// Use `SymbolId::new()` to create and `id.0` or `id.as_u32()` to access.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
13#[repr(transparent)]
14pub struct SymbolId(pub u32);
15
16impl SymbolId {
17    /// Create a new SymbolId
18    #[inline]
19    pub const fn new(id: u32) -> Self {
20        Self(id)
21    }
22
23    /// Get the underlying u32 value
24    #[inline]
25    pub const fn as_u32(self) -> u32 {
26        self.0
27    }
28}
29
30impl From<u32> for SymbolId {
31    #[inline]
32    fn from(id: u32) -> Self {
33        Self(id)
34    }
35}
36
37impl From<SymbolId> for u32 {
38    #[inline]
39    fn from(id: SymbolId) -> Self {
40        id.0
41    }
42}
43
44impl std::fmt::Display for SymbolId {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        write!(f, "sym#{}", self.0)
47    }
48}
49
50/// Type-safe file ID to prevent mixing with other integer types.
51/// Use `FileId::new()` to create and `id.0` or `id.as_u32()` to access.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
53#[repr(transparent)]
54pub struct FileId(pub u32);
55
56impl FileId {
57    /// Create a new FileId
58    #[inline]
59    pub const fn new(id: u32) -> Self {
60        Self(id)
61    }
62
63    /// Get the underlying u32 value
64    #[inline]
65    pub const fn as_u32(self) -> u32 {
66        self.0
67    }
68}
69
70impl From<u32> for FileId {
71    #[inline]
72    fn from(id: u32) -> Self {
73        Self(id)
74    }
75}
76
77impl From<FileId> for u32 {
78    #[inline]
79    fn from(id: FileId) -> Self {
80        id.0
81    }
82}
83
84impl std::fmt::Display for FileId {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        write!(f, "file#{}", self.0)
87    }
88}
89
90/// A symbol in the index with unique ID for graph operations.
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct IndexSymbol {
93    /// Unique symbol ID within this index
94    pub id: SymbolId,
95    /// Symbol name
96    pub name: String,
97    /// Symbol kind
98    pub kind: IndexSymbolKind,
99    /// File ID containing this symbol
100    pub file_id: FileId,
101    /// Source span (line/column positions)
102    pub span: Span,
103    /// Full signature for functions/methods
104    pub signature: Option<String>,
105    /// Parent symbol ID (for methods inside classes)
106    pub parent: Option<SymbolId>,
107    /// Visibility modifier
108    pub visibility: Visibility,
109    /// Documentation string
110    pub docstring: Option<String>,
111}
112
113/// Symbol kind for the index (extended from core SymbolKind)
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub enum IndexSymbolKind {
116    Function,
117    Method,
118    Class,
119    Struct,
120    Interface,
121    Trait,
122    Enum,
123    Constant,
124    Variable,
125    Module,
126    Import,
127    Export,
128    TypeAlias,
129    Macro,
130}
131
132impl IndexSymbolKind {
133    pub fn name(&self) -> &'static str {
134        match self {
135            Self::Function => "function",
136            Self::Method => "method",
137            Self::Class => "class",
138            Self::Struct => "struct",
139            Self::Interface => "interface",
140            Self::Trait => "trait",
141            Self::Enum => "enum",
142            Self::Constant => "constant",
143            Self::Variable => "variable",
144            Self::Module => "module",
145            Self::Import => "import",
146            Self::Export => "export",
147            Self::TypeAlias => "type",
148            Self::Macro => "macro",
149        }
150    }
151
152    /// Check if this symbol kind defines a scope (can contain other symbols)
153    pub fn is_scope(&self) -> bool {
154        matches!(
155            self,
156            Self::Class | Self::Struct | Self::Interface | Self::Trait | Self::Module | Self::Enum
157        )
158    }
159}
160
161/// Visibility modifier for symbols
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
163pub enum Visibility {
164    #[default]
165    Public,
166    Private,
167    Protected,
168    Internal,
169}
170
171/// Source code span (start and end positions)
172#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
173pub struct Span {
174    pub start_line: u32,
175    pub start_col: u16,
176    pub end_line: u32,
177    pub end_col: u16,
178}
179
180impl Span {
181    pub fn new(start_line: u32, start_col: u16, end_line: u32, end_col: u16) -> Self {
182        Self { start_line, start_col, end_line, end_col }
183    }
184
185    /// Check if a line falls within this span
186    pub fn contains_line(&self, line: u32) -> bool {
187        line >= self.start_line && line <= self.end_line
188    }
189
190    /// Number of lines in this span
191    pub fn line_count(&self) -> u32 {
192        if self.end_line >= self.start_line {
193            self.end_line - self.start_line + 1
194        } else {
195            1
196        }
197    }
198}
199
200/// A file entry in the index
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct FileEntry {
203    /// Unique file ID
204    pub id: FileId,
205    /// Relative path from repository root
206    pub path: String,
207    /// Detected language
208    pub language: Language,
209    /// BLAKE3 content hash for change detection
210    pub content_hash: [u8; 32],
211    /// Index range into the symbols vector (raw u32 for Range compatibility)
212    pub symbols: Range<u32>,
213    /// Import statements in this file
214    pub imports: Vec<Import>,
215    /// Number of lines
216    pub lines: u32,
217    /// Pre-computed token count (Claude model)
218    pub tokens: u32,
219}
220
221/// Detected programming language
222#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
223pub enum Language {
224    Rust,
225    Python,
226    JavaScript,
227    TypeScript,
228    Go,
229    Java,
230    C,
231    Cpp,
232    CSharp,
233    Ruby,
234    Bash,
235    Php,
236    Kotlin,
237    Swift,
238    Scala,
239    Haskell,
240    Elixir,
241    Clojure,
242    OCaml,
243    Lua,
244    R,
245    #[default]
246    Unknown,
247}
248
249impl Language {
250    pub fn from_extension(ext: &str) -> Self {
251        match ext.to_lowercase().as_str() {
252            "rs" => Self::Rust,
253            "py" | "pyi" | "pyw" => Self::Python,
254            "js" | "mjs" | "cjs" => Self::JavaScript,
255            "ts" | "mts" | "cts" => Self::TypeScript,
256            "tsx" | "jsx" => Self::TypeScript,
257            "go" => Self::Go,
258            "java" => Self::Java,
259            "c" | "h" => Self::C,
260            "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Self::Cpp,
261            "cs" => Self::CSharp,
262            "rb" => Self::Ruby,
263            "sh" | "bash" | "zsh" => Self::Bash,
264            "php" | "php3" | "php4" | "php5" | "phtml" => Self::Php,
265            "kt" | "kts" => Self::Kotlin,
266            "swift" => Self::Swift,
267            "scala" | "sc" => Self::Scala,
268            "hs" | "lhs" => Self::Haskell,
269            "ex" | "exs" => Self::Elixir,
270            "clj" | "cljs" | "cljc" | "edn" => Self::Clojure,
271            "ml" | "mli" => Self::OCaml,
272            "lua" => Self::Lua,
273            "r" | "rmd" => Self::R,
274            _ => Self::Unknown,
275        }
276    }
277
278    pub fn name(&self) -> &'static str {
279        match self {
280            Self::Rust => "rust",
281            Self::Python => "python",
282            Self::JavaScript => "javascript",
283            Self::TypeScript => "typescript",
284            Self::Go => "go",
285            Self::Java => "java",
286            Self::C => "c",
287            Self::Cpp => "cpp",
288            Self::CSharp => "csharp",
289            Self::Ruby => "ruby",
290            Self::Bash => "bash",
291            Self::Php => "php",
292            Self::Kotlin => "kotlin",
293            Self::Swift => "swift",
294            Self::Scala => "scala",
295            Self::Haskell => "haskell",
296            Self::Elixir => "elixir",
297            Self::Clojure => "clojure",
298            Self::OCaml => "ocaml",
299            Self::Lua => "lua",
300            Self::R => "r",
301            Self::Unknown => "unknown",
302        }
303    }
304}
305
306/// An import statement
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct Import {
309    /// Source path or module name (e.g., "src/utils" or "lodash")
310    pub source: String,
311    /// Resolved file ID if it's an internal import
312    pub resolved_file: Option<u32>,
313    /// Specific symbols imported (empty for wildcard imports)
314    pub symbols: Vec<String>,
315    /// Source span
316    pub span: Span,
317    /// Whether this is an external dependency
318    pub is_external: bool,
319}
320
321/// Main symbol index structure
322#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct SymbolIndex {
324    /// Index version (for compatibility checking)
325    pub version: u32,
326    /// Repository name
327    pub repo_name: String,
328    /// Git commit hash when index was built
329    pub commit_hash: Option<String>,
330    /// Timestamp of index creation
331    pub created_at: u64,
332    /// All files in the repository
333    pub files: Vec<FileEntry>,
334    /// All symbols across all files
335    pub symbols: Vec<IndexSymbol>,
336
337    // Lookup tables (built on load, not serialized)
338    #[serde(skip)]
339    pub file_by_path: HashMap<String, u32>,
340    #[serde(skip)]
341    pub symbols_by_name: HashMap<String, Vec<u32>>,
342}
343
344impl Default for SymbolIndex {
345    fn default() -> Self {
346        Self::new()
347    }
348}
349
350impl SymbolIndex {
351    pub const CURRENT_VERSION: u32 = 1;
352
353    pub fn new() -> Self {
354        Self {
355            version: Self::CURRENT_VERSION,
356            repo_name: String::new(),
357            commit_hash: None,
358            created_at: 0,
359            files: Vec::new(),
360            symbols: Vec::new(),
361            file_by_path: HashMap::new(),
362            symbols_by_name: HashMap::new(),
363        }
364    }
365
366    /// Rebuild lookup tables after deserialization
367    pub fn rebuild_lookups(&mut self) {
368        self.file_by_path.clear();
369        self.symbols_by_name.clear();
370
371        for file in &self.files {
372            self.file_by_path
373                .insert(file.path.clone(), file.id.as_u32());
374        }
375
376        for symbol in &self.symbols {
377            self.symbols_by_name
378                .entry(symbol.name.clone())
379                .or_default()
380                .push(symbol.id.as_u32());
381        }
382    }
383
384    /// Get file by path
385    pub fn get_file(&self, path: &str) -> Option<&FileEntry> {
386        self.file_by_path
387            .get(path)
388            .and_then(|&id| self.files.get(id as usize))
389    }
390
391    /// Get file by ID
392    pub fn get_file_by_id(&self, id: u32) -> Option<&FileEntry> {
393        self.files.get(id as usize)
394    }
395
396    /// Get symbol by ID
397    pub fn get_symbol(&self, id: u32) -> Option<&IndexSymbol> {
398        self.symbols.get(id as usize)
399    }
400
401    /// Get all symbols in a file
402    pub fn get_file_symbols(&self, file_id: FileId) -> &[IndexSymbol] {
403        if let Some(file) = self.get_file_by_id(file_id.as_u32()) {
404            &self.symbols[file.symbols.start as usize..file.symbols.end as usize]
405        } else {
406            &[]
407        }
408    }
409
410    /// Find symbols by name
411    pub fn find_symbols(&self, name: &str) -> Vec<&IndexSymbol> {
412        self.symbols_by_name
413            .get(name)
414            .map(|ids| ids.iter().filter_map(|&id| self.get_symbol(id)).collect())
415            .unwrap_or_default()
416    }
417
418    /// Find symbol containing a specific line in a file
419    pub fn find_symbol_at_line(&self, file_id: FileId, line: u32) -> Option<&IndexSymbol> {
420        self.get_file_symbols(file_id)
421            .iter()
422            .filter(|s| s.span.contains_line(line))
423            // Return the innermost (smallest) symbol containing the line
424            .min_by_key(|s| s.span.line_count())
425    }
426}
427
428/// Dependency graph for impact analysis
429///
430/// Uses both edge lists (for serialization) and adjacency maps (for O(1) queries).
431/// The adjacency maps are rebuilt after deserialization via `rebuild_adjacency_maps()`.
432#[derive(Debug, Clone, Default, Serialize, Deserialize)]
433pub struct DepGraph {
434    // Forward edges: X depends on Y
435    /// File imports: (file_id, imported_file_id)
436    pub file_imports: Vec<(u32, u32)>,
437    /// Symbol references: (symbol_id, referenced_symbol_id)
438    pub symbol_refs: Vec<(u32, u32)>,
439
440    // Reverse edges: Y is depended on by X
441    /// File imported by: (file_id, importing_file_id)
442    pub file_imported_by: Vec<(u32, u32)>,
443    /// Symbol referenced by: (symbol_id, referencing_symbol_id)
444    pub symbol_ref_by: Vec<(u32, u32)>,
445
446    // Call graph
447    /// Function calls: (caller_symbol_id, callee_symbol_id)
448    pub calls: Vec<(u32, u32)>,
449    /// Called by: (callee_symbol_id, caller_symbol_id)
450    pub called_by: Vec<(u32, u32)>,
451
452    // Pre-computed metrics
453    /// PageRank importance score per file
454    pub file_pagerank: Vec<f32>,
455    /// PageRank importance score per symbol
456    pub symbol_pagerank: Vec<f32>,
457
458    // ===== Adjacency maps for O(1) lookups (not serialized, rebuilt on load) =====
459    /// file_id -> list of files it imports
460    #[serde(skip)]
461    pub imports_adj: HashMap<u32, Vec<u32>>,
462    /// file_id -> list of files that import it
463    #[serde(skip)]
464    pub imported_by_adj: HashMap<u32, Vec<u32>>,
465    /// symbol_id -> list of symbols it references
466    #[serde(skip)]
467    pub refs_adj: HashMap<u32, Vec<u32>>,
468    /// symbol_id -> list of symbols that reference it
469    #[serde(skip)]
470    pub ref_by_adj: HashMap<u32, Vec<u32>>,
471    /// caller_id -> list of callees
472    #[serde(skip)]
473    pub callees_adj: HashMap<u32, Vec<u32>>,
474    /// callee_id -> list of callers
475    #[serde(skip)]
476    pub callers_adj: HashMap<u32, Vec<u32>>,
477}
478
479impl DepGraph {
480    pub fn new() -> Self {
481        Self::default()
482    }
483
484    /// Rebuild adjacency maps from edge lists.
485    /// Call this after deserializing a DepGraph.
486    pub fn rebuild_adjacency_maps(&mut self) {
487        self.imports_adj.clear();
488        self.imported_by_adj.clear();
489        self.refs_adj.clear();
490        self.ref_by_adj.clear();
491        self.callees_adj.clear();
492        self.callers_adj.clear();
493
494        // Rebuild file import adjacency
495        for &(from, to) in &self.file_imports {
496            self.imports_adj.entry(from).or_default().push(to);
497        }
498        for &(file, importer) in &self.file_imported_by {
499            self.imported_by_adj.entry(file).or_default().push(importer);
500        }
501
502        // Rebuild symbol reference adjacency
503        for &(from, to) in &self.symbol_refs {
504            self.refs_adj.entry(from).or_default().push(to);
505        }
506        for &(symbol, referencer) in &self.symbol_ref_by {
507            self.ref_by_adj.entry(symbol).or_default().push(referencer);
508        }
509
510        // Rebuild call graph adjacency
511        for &(caller, callee) in &self.calls {
512            self.callees_adj.entry(caller).or_default().push(callee);
513        }
514        for &(callee, caller) in &self.called_by {
515            self.callers_adj.entry(callee).or_default().push(caller);
516        }
517    }
518
519    /// Add a file import edge
520    pub fn add_file_import(&mut self, from_file: u32, to_file: u32) {
521        self.file_imports.push((from_file, to_file));
522        self.file_imported_by.push((to_file, from_file));
523        // Update adjacency maps
524        self.imports_adj.entry(from_file).or_default().push(to_file);
525        self.imported_by_adj
526            .entry(to_file)
527            .or_default()
528            .push(from_file);
529    }
530
531    /// Add a symbol reference edge
532    pub fn add_symbol_ref(&mut self, from_symbol: u32, to_symbol: u32) {
533        self.symbol_refs.push((from_symbol, to_symbol));
534        self.symbol_ref_by.push((to_symbol, from_symbol));
535        // Update adjacency maps
536        self.refs_adj
537            .entry(from_symbol)
538            .or_default()
539            .push(to_symbol);
540        self.ref_by_adj
541            .entry(to_symbol)
542            .or_default()
543            .push(from_symbol);
544    }
545
546    /// Add a function call edge
547    pub fn add_call(&mut self, caller: u32, callee: u32) {
548        self.calls.push((caller, callee));
549        self.called_by.push((callee, caller));
550        // Update adjacency maps
551        self.callees_adj.entry(caller).or_default().push(callee);
552        self.callers_adj.entry(callee).or_default().push(caller);
553    }
554
555    /// Get files that import a given file (O(1) lookup)
556    pub fn get_importers(&self, file_id: u32) -> Vec<u32> {
557        self.imported_by_adj
558            .get(&file_id)
559            .cloned()
560            .unwrap_or_default()
561    }
562
563    /// Get files that a given file imports (O(1) lookup)
564    pub fn get_imports(&self, file_id: u32) -> Vec<u32> {
565        self.imports_adj.get(&file_id).cloned().unwrap_or_default()
566    }
567
568    /// Get symbols that reference a given symbol (O(1) lookup)
569    pub fn get_referencers(&self, symbol_id: u32) -> Vec<u32> {
570        self.ref_by_adj.get(&symbol_id).cloned().unwrap_or_default()
571    }
572
573    /// Get callers of a function (O(1) lookup)
574    pub fn get_callers(&self, symbol_id: u32) -> Vec<u32> {
575        self.callers_adj
576            .get(&symbol_id)
577            .cloned()
578            .unwrap_or_default()
579    }
580
581    /// Get callees of a function (O(1) lookup)
582    pub fn get_callees(&self, symbol_id: u32) -> Vec<u32> {
583        self.callees_adj
584            .get(&symbol_id)
585            .cloned()
586            .unwrap_or_default()
587    }
588
589    /// Check if adjacency maps are populated (used to detect if rebuild is needed)
590    pub fn needs_rebuild(&self) -> bool {
591        // If we have edges but no adjacency data, rebuild is needed
592        (!self.file_imports.is_empty() && self.imports_adj.is_empty())
593            || (!self.calls.is_empty() && self.callees_adj.is_empty())
594    }
595}
596
597/// A reference to a symbol (for tracking usages)
598#[derive(Debug, Clone, Serialize, Deserialize)]
599pub struct Reference {
600    /// Referenced symbol ID
601    pub symbol_id: u32,
602    /// File containing the reference
603    pub file_id: u32,
604    /// Location of the reference
605    pub span: Span,
606    /// Kind of reference
607    pub kind: RefKind,
608}
609
610/// Kind of reference
611#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
612pub enum RefKind {
613    /// Function/method call
614    Call,
615    /// Variable read
616    Read,
617    /// Variable write
618    Write,
619    /// Import statement
620    Import,
621    /// Type annotation
622    TypeRef,
623    /// Class inheritance
624    Inheritance,
625}
626
627#[cfg(test)]
628mod tests {
629    use super::*;
630
631    #[test]
632    fn test_span_contains_line() {
633        let span = Span::new(10, 0, 20, 0);
634        assert!(span.contains_line(10));
635        assert!(span.contains_line(15));
636        assert!(span.contains_line(20));
637        assert!(!span.contains_line(9));
638        assert!(!span.contains_line(21));
639    }
640
641    #[test]
642    fn test_language_from_extension() {
643        assert_eq!(Language::from_extension("rs"), Language::Rust);
644        assert_eq!(Language::from_extension("py"), Language::Python);
645        assert_eq!(Language::from_extension("ts"), Language::TypeScript);
646        assert_eq!(Language::from_extension("xyz"), Language::Unknown);
647    }
648
649    #[test]
650    fn test_symbol_index_lookups() {
651        let mut index = SymbolIndex::new();
652        index.files.push(FileEntry {
653            id: FileId::new(0),
654            path: "src/main.rs".to_owned(),
655            language: Language::Rust,
656            content_hash: [0; 32],
657            symbols: 0..2,
658            imports: vec![],
659            lines: 100,
660            tokens: 500,
661        });
662        index.symbols.push(IndexSymbol {
663            id: SymbolId::new(0),
664            name: "main".to_owned(),
665            kind: IndexSymbolKind::Function,
666            file_id: FileId::new(0),
667            span: Span::new(1, 0, 10, 0),
668            signature: Some("fn main()".to_owned()),
669            parent: None,
670            visibility: Visibility::Public,
671            docstring: None,
672        });
673        index.symbols.push(IndexSymbol {
674            id: SymbolId::new(1),
675            name: "helper".to_owned(),
676            kind: IndexSymbolKind::Function,
677            file_id: FileId::new(0),
678            span: Span::new(15, 0, 25, 0),
679            signature: Some("fn helper()".to_owned()),
680            parent: None,
681            visibility: Visibility::Private,
682            docstring: None,
683        });
684
685        index.rebuild_lookups();
686
687        assert!(index.get_file("src/main.rs").is_some());
688        assert!(index.get_file("nonexistent.rs").is_none());
689
690        let main_symbols = index.find_symbols("main");
691        assert_eq!(main_symbols.len(), 1);
692        assert_eq!(main_symbols[0].name, "main");
693
694        let symbol = index.find_symbol_at_line(FileId::new(0), 5);
695        assert!(symbol.is_some());
696        assert_eq!(symbol.unwrap().name, "main");
697
698        let symbol = index.find_symbol_at_line(FileId::new(0), 20);
699        assert!(symbol.is_some());
700        assert_eq!(symbol.unwrap().name, "helper");
701    }
702
703    #[test]
704    fn test_dep_graph() {
705        let mut graph = DepGraph::new();
706        graph.add_file_import(0, 1);
707        graph.add_file_import(0, 2);
708        graph.add_file_import(1, 2);
709
710        assert_eq!(graph.get_imports(0), vec![1, 2]);
711        assert_eq!(graph.get_importers(2), vec![0, 1]);
712
713        graph.add_call(10, 20);
714        graph.add_call(10, 21);
715
716        assert_eq!(graph.get_callees(10), vec![20, 21]);
717        assert_eq!(graph.get_callers(20), vec![10]);
718    }
719}