infiniloom_engine/index/
types.rs

1//! Core data structures for the Git context index.
2//!
3//! This module defines the types used to build and query a pre-computed
4//! index of symbols, files, and their relationships for fast diff context.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::ops::Range;
9
10/// Type-safe symbol ID to prevent mixing with other integer types.
11/// Use `SymbolId::new()` to create and `id.0` or `id.as_u32()` to access.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
13#[repr(transparent)]
14pub struct SymbolId(pub u32);
15
16impl SymbolId {
17    /// Create a new SymbolId
18    #[inline]
19    pub const fn new(id: u32) -> Self {
20        Self(id)
21    }
22
23    /// Get the underlying u32 value
24    #[inline]
25    pub const fn as_u32(self) -> u32 {
26        self.0
27    }
28}
29
30impl From<u32> for SymbolId {
31    #[inline]
32    fn from(id: u32) -> Self {
33        Self(id)
34    }
35}
36
37impl From<SymbolId> for u32 {
38    #[inline]
39    fn from(id: SymbolId) -> Self {
40        id.0
41    }
42}
43
44impl std::fmt::Display for SymbolId {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        write!(f, "sym#{}", self.0)
47    }
48}
49
50/// Type-safe file ID to prevent mixing with other integer types.
51/// Use `FileId::new()` to create and `id.0` or `id.as_u32()` to access.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
53#[repr(transparent)]
54pub struct FileId(pub u32);
55
56impl FileId {
57    /// Create a new FileId
58    #[inline]
59    pub const fn new(id: u32) -> Self {
60        Self(id)
61    }
62
63    /// Get the underlying u32 value
64    #[inline]
65    pub const fn as_u32(self) -> u32 {
66        self.0
67    }
68}
69
70impl From<u32> for FileId {
71    #[inline]
72    fn from(id: u32) -> Self {
73        Self(id)
74    }
75}
76
77impl From<FileId> for u32 {
78    #[inline]
79    fn from(id: FileId) -> Self {
80        id.0
81    }
82}
83
84impl std::fmt::Display for FileId {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        write!(f, "file#{}", self.0)
87    }
88}
89
90/// A symbol in the index with unique ID for graph operations.
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct IndexSymbol {
93    /// Unique symbol ID within this index
94    pub id: SymbolId,
95    /// Symbol name
96    pub name: String,
97    /// Symbol kind
98    pub kind: IndexSymbolKind,
99    /// File ID containing this symbol
100    pub file_id: FileId,
101    /// Source span (line/column positions)
102    pub span: Span,
103    /// Full signature for functions/methods
104    pub signature: Option<String>,
105    /// Parent symbol ID (for methods inside classes)
106    pub parent: Option<SymbolId>,
107    /// Visibility modifier
108    pub visibility: Visibility,
109    /// Documentation string
110    pub docstring: Option<String>,
111}
112
113/// Symbol kind for the index (extended from core SymbolKind)
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub enum IndexSymbolKind {
116    Function,
117    Method,
118    Class,
119    Struct,
120    Interface,
121    Trait,
122    Enum,
123    Constant,
124    Variable,
125    Module,
126    Import,
127    Export,
128    TypeAlias,
129    Macro,
130}
131
132impl IndexSymbolKind {
133    pub fn name(&self) -> &'static str {
134        match self {
135            Self::Function => "function",
136            Self::Method => "method",
137            Self::Class => "class",
138            Self::Struct => "struct",
139            Self::Interface => "interface",
140            Self::Trait => "trait",
141            Self::Enum => "enum",
142            Self::Constant => "constant",
143            Self::Variable => "variable",
144            Self::Module => "module",
145            Self::Import => "import",
146            Self::Export => "export",
147            Self::TypeAlias => "type",
148            Self::Macro => "macro",
149        }
150    }
151
152    /// Check if this symbol kind defines a scope (can contain other symbols)
153    pub fn is_scope(&self) -> bool {
154        matches!(
155            self,
156            Self::Class | Self::Struct | Self::Interface | Self::Trait | Self::Module | Self::Enum
157        )
158    }
159}
160
161/// Visibility modifier for symbols
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
163pub enum Visibility {
164    #[default]
165    Public,
166    Private,
167    Protected,
168    Internal,
169}
170
171/// Source code span (start and end positions)
172#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
173pub struct Span {
174    pub start_line: u32,
175    pub start_col: u16,
176    pub end_line: u32,
177    pub end_col: u16,
178}
179
180impl Span {
181    pub fn new(start_line: u32, start_col: u16, end_line: u32, end_col: u16) -> Self {
182        Self { start_line, start_col, end_line, end_col }
183    }
184
185    /// Check if a line falls within this span
186    pub fn contains_line(&self, line: u32) -> bool {
187        line >= self.start_line && line <= self.end_line
188    }
189
190    /// Number of lines in this span
191    pub fn line_count(&self) -> u32 {
192        if self.end_line >= self.start_line {
193            self.end_line - self.start_line + 1
194        } else {
195            1
196        }
197    }
198}
199
200/// A file entry in the index
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct FileEntry {
203    /// Unique file ID
204    pub id: FileId,
205    /// Relative path from repository root
206    pub path: String,
207    /// Detected language
208    pub language: Language,
209    /// BLAKE3 content hash for change detection
210    pub content_hash: [u8; 32],
211    /// Index range into the symbols vector (raw u32 for Range compatibility)
212    pub symbols: Range<u32>,
213    /// Import statements in this file
214    pub imports: Vec<Import>,
215    /// Number of lines
216    pub lines: u32,
217    /// Pre-computed token count (Claude model)
218    pub tokens: u32,
219}
220
221/// Detected programming language
222#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
223pub enum Language {
224    Rust,
225    Python,
226    JavaScript,
227    TypeScript,
228    Go,
229    Java,
230    C,
231    Cpp,
232    CSharp,
233    Ruby,
234    Bash,
235    Php,
236    Kotlin,
237    Swift,
238    Scala,
239    Haskell,
240    Elixir,
241    Clojure,
242    OCaml,
243    Lua,
244    R,
245    #[default]
246    Unknown,
247}
248
249impl Language {
250    pub fn from_extension(ext: &str) -> Self {
251        match ext.to_lowercase().as_str() {
252            "rs" => Self::Rust,
253            "py" | "pyi" | "pyw" => Self::Python,
254            "js" | "mjs" | "cjs" => Self::JavaScript,
255            "ts" | "mts" | "cts" => Self::TypeScript,
256            "tsx" | "jsx" => Self::TypeScript,
257            "go" => Self::Go,
258            "java" => Self::Java,
259            "c" | "h" => Self::C,
260            "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Self::Cpp,
261            "cs" => Self::CSharp,
262            "rb" => Self::Ruby,
263            "sh" | "bash" | "zsh" => Self::Bash,
264            "php" | "php3" | "php4" | "php5" | "phtml" => Self::Php,
265            "kt" | "kts" => Self::Kotlin,
266            "swift" => Self::Swift,
267            "scala" | "sc" => Self::Scala,
268            "hs" | "lhs" => Self::Haskell,
269            "ex" | "exs" => Self::Elixir,
270            "clj" | "cljs" | "cljc" | "edn" => Self::Clojure,
271            "ml" | "mli" => Self::OCaml,
272            "lua" => Self::Lua,
273            "r" | "rmd" => Self::R,
274            _ => Self::Unknown,
275        }
276    }
277
278    pub fn name(&self) -> &'static str {
279        match self {
280            Self::Rust => "rust",
281            Self::Python => "python",
282            Self::JavaScript => "javascript",
283            Self::TypeScript => "typescript",
284            Self::Go => "go",
285            Self::Java => "java",
286            Self::C => "c",
287            Self::Cpp => "cpp",
288            Self::CSharp => "csharp",
289            Self::Ruby => "ruby",
290            Self::Bash => "bash",
291            Self::Php => "php",
292            Self::Kotlin => "kotlin",
293            Self::Swift => "swift",
294            Self::Scala => "scala",
295            Self::Haskell => "haskell",
296            Self::Elixir => "elixir",
297            Self::Clojure => "clojure",
298            Self::OCaml => "ocaml",
299            Self::Lua => "lua",
300            Self::R => "r",
301            Self::Unknown => "unknown",
302        }
303    }
304}
305
306/// An import statement
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct Import {
309    /// Source path or module name (e.g., "src/utils" or "lodash")
310    pub source: String,
311    /// Resolved file ID if it's an internal import
312    pub resolved_file: Option<u32>,
313    /// Specific symbols imported (empty for wildcard imports)
314    pub symbols: Vec<String>,
315    /// Source span
316    pub span: Span,
317    /// Whether this is an external dependency
318    pub is_external: bool,
319}
320
321/// Main symbol index structure
322#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct SymbolIndex {
324    /// Index version (for compatibility checking)
325    pub version: u32,
326    /// Repository name
327    pub repo_name: String,
328    /// Git commit hash when index was built
329    pub commit_hash: Option<String>,
330    /// Timestamp of index creation
331    pub created_at: u64,
332    /// All files in the repository
333    pub files: Vec<FileEntry>,
334    /// All symbols across all files
335    pub symbols: Vec<IndexSymbol>,
336
337    // Lookup tables (built on load, not serialized)
338    #[serde(skip)]
339    pub file_by_path: HashMap<String, u32>,
340    #[serde(skip)]
341    pub symbols_by_name: HashMap<String, Vec<u32>>,
342}
343
344impl Default for SymbolIndex {
345    fn default() -> Self {
346        Self::new()
347    }
348}
349
350impl SymbolIndex {
351    pub const CURRENT_VERSION: u32 = 1;
352
353    pub fn new() -> Self {
354        Self {
355            version: Self::CURRENT_VERSION,
356            repo_name: String::new(),
357            commit_hash: None,
358            created_at: 0,
359            files: Vec::new(),
360            symbols: Vec::new(),
361            file_by_path: HashMap::new(),
362            symbols_by_name: HashMap::new(),
363        }
364    }
365
366    /// Rebuild lookup tables after deserialization
367    pub fn rebuild_lookups(&mut self) {
368        self.file_by_path.clear();
369        self.symbols_by_name.clear();
370
371        for file in &self.files {
372            self.file_by_path
373                .insert(file.path.clone(), file.id.as_u32());
374        }
375
376        for symbol in &self.symbols {
377            self.symbols_by_name
378                .entry(symbol.name.clone())
379                .or_default()
380                .push(symbol.id.as_u32());
381        }
382    }
383
384    /// Get file by path
385    pub fn get_file(&self, path: &str) -> Option<&FileEntry> {
386        self.file_by_path
387            .get(path)
388            .and_then(|&id| self.files.get(id as usize))
389    }
390
391    /// Get file by ID
392    pub fn get_file_by_id(&self, id: u32) -> Option<&FileEntry> {
393        self.files.get(id as usize)
394    }
395
396    /// Get symbol by ID
397    pub fn get_symbol(&self, id: u32) -> Option<&IndexSymbol> {
398        self.symbols.get(id as usize)
399    }
400
401    /// Get all symbols in a file
402    pub fn get_file_symbols(&self, file_id: FileId) -> &[IndexSymbol] {
403        if let Some(file) = self.get_file_by_id(file_id.as_u32()) {
404            &self.symbols[file.symbols.start as usize..file.symbols.end as usize]
405        } else {
406            &[]
407        }
408    }
409
410    /// Find symbols by name
411    pub fn find_symbols(&self, name: &str) -> Vec<&IndexSymbol> {
412        self.symbols_by_name
413            .get(name)
414            .map(|ids| ids.iter().filter_map(|&id| self.get_symbol(id)).collect())
415            .unwrap_or_default()
416    }
417
418    /// Find symbol containing a specific line in a file
419    pub fn find_symbol_at_line(&self, file_id: FileId, line: u32) -> Option<&IndexSymbol> {
420        self.get_file_symbols(file_id)
421            .iter()
422            .filter(|s| s.span.contains_line(line))
423            // Return the innermost (smallest) symbol containing the line
424            .min_by_key(|s| s.span.line_count())
425    }
426}
427
428/// Dependency graph for impact analysis
429#[derive(Debug, Clone, Default, Serialize, Deserialize)]
430pub struct DepGraph {
431    // Forward edges: X depends on Y
432    /// File imports: (file_id, imported_file_id)
433    pub file_imports: Vec<(u32, u32)>,
434    /// Symbol references: (symbol_id, referenced_symbol_id)
435    pub symbol_refs: Vec<(u32, u32)>,
436
437    // Reverse edges: Y is depended on by X
438    /// File imported by: (file_id, importing_file_id)
439    pub file_imported_by: Vec<(u32, u32)>,
440    /// Symbol referenced by: (symbol_id, referencing_symbol_id)
441    pub symbol_ref_by: Vec<(u32, u32)>,
442
443    // Call graph
444    /// Function calls: (caller_symbol_id, callee_symbol_id)
445    pub calls: Vec<(u32, u32)>,
446    /// Called by: (callee_symbol_id, caller_symbol_id)
447    pub called_by: Vec<(u32, u32)>,
448
449    // Pre-computed metrics
450    /// PageRank importance score per file
451    pub file_pagerank: Vec<f32>,
452    /// PageRank importance score per symbol
453    pub symbol_pagerank: Vec<f32>,
454}
455
456impl DepGraph {
457    pub fn new() -> Self {
458        Self::default()
459    }
460
461    /// Add a file import edge
462    pub fn add_file_import(&mut self, from_file: u32, to_file: u32) {
463        self.file_imports.push((from_file, to_file));
464        self.file_imported_by.push((to_file, from_file));
465    }
466
467    /// Add a symbol reference edge
468    pub fn add_symbol_ref(&mut self, from_symbol: u32, to_symbol: u32) {
469        self.symbol_refs.push((from_symbol, to_symbol));
470        self.symbol_ref_by.push((to_symbol, from_symbol));
471    }
472
473    /// Add a function call edge
474    pub fn add_call(&mut self, caller: u32, callee: u32) {
475        self.calls.push((caller, callee));
476        self.called_by.push((callee, caller));
477    }
478
479    /// Get files that import a given file
480    pub fn get_importers(&self, file_id: u32) -> Vec<u32> {
481        self.file_imported_by
482            .iter()
483            .filter_map(|&(f, importer)| if f == file_id { Some(importer) } else { None })
484            .collect()
485    }
486
487    /// Get files that a given file imports
488    pub fn get_imports(&self, file_id: u32) -> Vec<u32> {
489        self.file_imports
490            .iter()
491            .filter_map(|&(f, imported)| if f == file_id { Some(imported) } else { None })
492            .collect()
493    }
494
495    /// Get symbols that reference a given symbol
496    pub fn get_referencers(&self, symbol_id: u32) -> Vec<u32> {
497        self.symbol_ref_by
498            .iter()
499            .filter_map(|&(s, referencer)| {
500                if s == symbol_id {
501                    Some(referencer)
502                } else {
503                    None
504                }
505            })
506            .collect()
507    }
508
509    /// Get callers of a function
510    pub fn get_callers(&self, symbol_id: u32) -> Vec<u32> {
511        self.called_by
512            .iter()
513            .filter_map(|&(callee, caller)| {
514                if callee == symbol_id {
515                    Some(caller)
516                } else {
517                    None
518                }
519            })
520            .collect()
521    }
522
523    /// Get callees of a function
524    pub fn get_callees(&self, symbol_id: u32) -> Vec<u32> {
525        self.calls
526            .iter()
527            .filter_map(|&(caller, callee)| {
528                if caller == symbol_id {
529                    Some(callee)
530                } else {
531                    None
532                }
533            })
534            .collect()
535    }
536}
537
538/// A reference to a symbol (for tracking usages)
539#[derive(Debug, Clone, Serialize, Deserialize)]
540pub struct Reference {
541    /// Referenced symbol ID
542    pub symbol_id: u32,
543    /// File containing the reference
544    pub file_id: u32,
545    /// Location of the reference
546    pub span: Span,
547    /// Kind of reference
548    pub kind: RefKind,
549}
550
551/// Kind of reference
552#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
553pub enum RefKind {
554    /// Function/method call
555    Call,
556    /// Variable read
557    Read,
558    /// Variable write
559    Write,
560    /// Import statement
561    Import,
562    /// Type annotation
563    TypeRef,
564    /// Class inheritance
565    Inheritance,
566}
567
568#[cfg(test)]
569mod tests {
570    use super::*;
571
572    #[test]
573    fn test_span_contains_line() {
574        let span = Span::new(10, 0, 20, 0);
575        assert!(span.contains_line(10));
576        assert!(span.contains_line(15));
577        assert!(span.contains_line(20));
578        assert!(!span.contains_line(9));
579        assert!(!span.contains_line(21));
580    }
581
582    #[test]
583    fn test_language_from_extension() {
584        assert_eq!(Language::from_extension("rs"), Language::Rust);
585        assert_eq!(Language::from_extension("py"), Language::Python);
586        assert_eq!(Language::from_extension("ts"), Language::TypeScript);
587        assert_eq!(Language::from_extension("xyz"), Language::Unknown);
588    }
589
590    #[test]
591    fn test_symbol_index_lookups() {
592        let mut index = SymbolIndex::new();
593        index.files.push(FileEntry {
594            id: FileId::new(0),
595            path: "src/main.rs".to_owned(),
596            language: Language::Rust,
597            content_hash: [0; 32],
598            symbols: 0..2,
599            imports: vec![],
600            lines: 100,
601            tokens: 500,
602        });
603        index.symbols.push(IndexSymbol {
604            id: SymbolId::new(0),
605            name: "main".to_owned(),
606            kind: IndexSymbolKind::Function,
607            file_id: FileId::new(0),
608            span: Span::new(1, 0, 10, 0),
609            signature: Some("fn main()".to_owned()),
610            parent: None,
611            visibility: Visibility::Public,
612            docstring: None,
613        });
614        index.symbols.push(IndexSymbol {
615            id: SymbolId::new(1),
616            name: "helper".to_owned(),
617            kind: IndexSymbolKind::Function,
618            file_id: FileId::new(0),
619            span: Span::new(15, 0, 25, 0),
620            signature: Some("fn helper()".to_owned()),
621            parent: None,
622            visibility: Visibility::Private,
623            docstring: None,
624        });
625
626        index.rebuild_lookups();
627
628        assert!(index.get_file("src/main.rs").is_some());
629        assert!(index.get_file("nonexistent.rs").is_none());
630
631        let main_symbols = index.find_symbols("main");
632        assert_eq!(main_symbols.len(), 1);
633        assert_eq!(main_symbols[0].name, "main");
634
635        let symbol = index.find_symbol_at_line(FileId::new(0), 5);
636        assert!(symbol.is_some());
637        assert_eq!(symbol.unwrap().name, "main");
638
639        let symbol = index.find_symbol_at_line(FileId::new(0), 20);
640        assert!(symbol.is_some());
641        assert_eq!(symbol.unwrap().name, "helper");
642    }
643
644    #[test]
645    fn test_dep_graph() {
646        let mut graph = DepGraph::new();
647        graph.add_file_import(0, 1);
648        graph.add_file_import(0, 2);
649        graph.add_file_import(1, 2);
650
651        assert_eq!(graph.get_imports(0), vec![1, 2]);
652        assert_eq!(graph.get_importers(2), vec![0, 1]);
653
654        graph.add_call(10, 20);
655        graph.add_call(10, 21);
656
657        assert_eq!(graph.get_callees(10), vec![20, 21]);
658        assert_eq!(graph.get_callers(20), vec![10]);
659    }
660}