Skip to main content

magellan/graph/
schema.rs

1//! Graph schema definitions for Magellan
2//!
3//! Defines node and edge payloads for sqlitegraph persistence.
4
5use serde::{Deserialize, Serialize};
6
7use anyhow::Result;
8
9/// File node payload stored in sqlitegraph
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct FileNode {
12    pub path: String,
13    pub hash: String,
14    /// Unix timestamp (seconds since epoch) when this file was last indexed
15    pub last_indexed_at: i64,
16    /// Unix timestamp (seconds since epoch) of filesystem mtime when indexed
17    pub last_modified: i64,
18}
19
20/// Symbol node payload stored in sqlitegraph
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct SymbolNode {
23    /// Stable symbol ID derived from (language, fqn, span_id)
24    ///
25    /// Generated via SHA-256 hash of language:fqn:span_id.
26    /// This ID is deterministic and stable across runs for the same symbol.
27    #[serde(default)]
28    pub symbol_id: Option<String>,
29
30    /// Fully-qualified name for this symbol
31    /// Format varies by language (crate::module::Name for Rust, package.Class.Name for Java)
32    /// This is the primary key for symbol lookup, preventing name collisions
33    #[serde(default)]
34    pub fqn: Option<String>,
35
36    /// Canonical fully-qualified name with file path for unambiguous identity
37    /// Format: crate_name::file_path::kind symbol_name
38    /// Example: my_crate::src/lib.rs::Function my_function
39    #[serde(default)]
40    pub canonical_fqn: Option<String>,
41
42    /// Display fully-qualified name for human-readable output
43    /// Shortened form without file path when possible
44    /// Example: my_crate::my_function
45    #[serde(default)]
46    pub display_fqn: Option<String>,
47
48    /// Simple symbol name (display name)
49    /// For user-facing output only. Not used as a unique identifier.
50    pub name: Option<String>,
51    pub kind: String,
52    #[serde(default)]
53    pub kind_normalized: Option<String>,
54    pub byte_start: usize,
55    pub byte_end: usize,
56    pub start_line: usize,
57    pub start_col: usize,
58    pub end_line: usize,
59    pub end_col: usize,
60}
61
62/// Reference node payload stored in sqlitegraph
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct ReferenceNode {
65    pub file: String,
66    pub byte_start: u64,
67    pub byte_end: u64,
68    pub start_line: u64,
69    pub start_col: u64,
70    pub end_line: u64,
71    pub end_col: u64,
72}
73
74/// Cross-file reference entry for efficient lookup
75///
76/// Stored separately from ReferenceNode to enable efficient cross-file
77/// reference queries without traversing the entire graph.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct CrossFileRef {
80    /// Symbol ID of the referencing symbol (source)
81    pub from_symbol_id: String,
82    /// Symbol ID of the referenced symbol (target)
83    pub to_symbol_id: String,
84    /// File path where the reference occurs
85    pub file_path: String,
86    /// Line number where the reference occurs (1-indexed)
87    pub line_number: usize,
88    /// Byte start position
89    pub byte_start: usize,
90    /// Byte end position
91    pub byte_end: usize,
92}
93
94/// Call node payload stored in sqlitegraph
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct CallNode {
97    pub file: String,
98    pub caller: String,
99    pub callee: String,
100    /// Stable symbol ID of the caller
101    #[serde(default)]
102    pub caller_symbol_id: Option<String>,
103    /// Stable symbol ID of the callee
104    #[serde(default)]
105    pub callee_symbol_id: Option<String>,
106    pub byte_start: u64,
107    pub byte_end: u64,
108    pub start_line: u64,
109    pub start_col: u64,
110    pub end_line: u64,
111    pub end_col: u64,
112}
113
114/// A single edge endpoint row for orphan detection.
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub struct EdgeEndpoints {
117    pub from_id: i64,
118    pub to_id: i64,
119}
120
121/// Control Flow Basic Block node payload stored in database
122///
123/// Represents a single basic block in a function's control flow graph.
124/// Basic blocks are sequences of statements with a single entry point
125/// and single exit point (via terminator).
126#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct CfgBlock {
128    /// Function symbol ID this block belongs to
129    pub function_id: i64,
130
131    /// Block kind (entry, conditional, loop, match, return, etc.)
132    pub kind: String,
133
134    /// Terminator kind (how control exits this block)
135    pub terminator: String,
136
137    /// Byte offset where block starts
138    pub byte_start: u64,
139
140    /// Byte offset where block ends
141    pub byte_end: u64,
142
143    /// Line where block starts (1-indexed)
144    pub start_line: u64,
145
146    /// Column where block starts (0-indexed)
147    pub start_col: u64,
148
149    /// Line where block ends (1-indexed)
150    pub end_line: u64,
151
152    /// Column where block ends (0-indexed)
153    pub end_col: u64,
154}
155
156/// Control Flow Edge payload stored in graph_edges table
157///
158/// Represents edges between basic blocks in a function's CFG.
159/// Uses edge_type = "CFG_BLOCK" for graph_edges entries.
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct CfgEdge {
162    /// Source block ID
163    pub from_block_id: i64,
164
165    /// Target block ID
166    pub to_block_id: i64,
167
168    /// Edge kind (unconditional, conditional_true, conditional_false, etc.)
169    pub kind: String,
170}
171
172/// Import node payload stored in sqlitegraph
173///
174/// Represents an import/use/from statement in source code.
175/// Stores metadata about what was imported and where.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct ImportNode {
178    /// File path containing this import
179    pub file: String,
180    /// Kind of import statement (use_crate, use_super, use_self, etc.)
181    pub import_kind: String,
182    /// Full import path as components (e.g., ["crate", "foo", "bar"] for crate::foo::bar)
183    pub import_path: Vec<String>,
184    /// Specific names imported (e.g., ["HashMap", "HashSet"] for use std::collections::* with specific items)
185    pub imported_names: Vec<String>,
186    /// Whether this is a glob import (e.g., use foo::* or from foo import *)
187    pub is_glob: bool,
188    /// Byte offset where import starts
189    pub byte_start: u64,
190    /// Byte offset where import ends
191    pub byte_end: u64,
192    /// Line where import starts (1-indexed)
193    pub start_line: u64,
194    /// Column where import starts (0-indexed)
195    pub start_col: u64,
196    /// Line where import ends (1-indexed)
197    pub end_line: u64,
198    /// Column where import ends (0-indexed)
199    pub end_col: u64,
200}
201
202/// Module path cache for efficient module resolution
203///
204/// Maps module paths (e.g., "crate::foo::bar") to file IDs.
205/// Used by ModuleResolver to convert relative paths to concrete file IDs.
206#[derive(Debug, Clone, Default)]
207pub struct ModulePathCache {
208    /// Cache mapping module path -> file_id
209    cache: std::collections::HashMap<String, i64>,
210}
211
212impl ModulePathCache {
213    /// Create a new empty cache
214    pub fn new() -> Self {
215        Self::default()
216    }
217
218    /// Insert a module path -> file_id mapping
219    pub fn insert(&mut self, module_path: String, file_id: i64) {
220        self.cache.insert(module_path, file_id);
221    }
222
223    /// Get file_id for a module path
224    pub fn get(&self, module_path: &str) -> Option<i64> {
225        self.cache.get(module_path).copied()
226    }
227
228    /// Get the number of entries in the cache
229    pub fn len(&self) -> usize {
230        self.cache.len()
231    }
232
233    /// Check if cache is empty
234    pub fn is_empty(&self) -> bool {
235        self.cache.is_empty()
236    }
237
238    /// Clear the cache
239    pub fn clear(&mut self) {
240        self.cache.clear();
241    }
242
243    /// Build the cache from indexed files in the database
244    ///
245    /// Scans all File nodes and extracts module declarations
246    /// to build path -> file_id mappings.
247    pub fn build_from_index(backend: &std::sync::Arc<dyn sqlitegraph::GraphBackend>) -> Self {
248        let mut cache = Self::new();
249
250        let snapshot = sqlitegraph::SnapshotId::current();
251        let entity_ids = match backend.entity_ids() {
252            Ok(ids) => ids,
253            Err(_) => return cache,
254        };
255
256        for entity_id in entity_ids {
257            let node = match backend.get_node(snapshot, entity_id) {
258                Ok(n) => n,
259                Err(_) => continue,
260            };
261
262            if node.kind != "File" {
263                continue;
264            }
265
266            // Get file_path from node.file_path or extract from FileNode data
267            let file_path = node.file_path.clone().or_else(|| {
268                // For V3 backend, file_path might not be set directly
269                // Extract from FileNode data
270                serde_json::from_value::<FileNode>(node.data.clone())
271                    .ok()
272                    .map(|file_node| file_node.path)
273            });
274            
275            let file_path_str = file_path.as_deref().unwrap_or("");
276
277            // Build module path from file path
278            // For Rust: src/foo/bar.rs -> crate::foo::bar
279            // For Rust: src/foo/mod.rs -> crate::foo
280            // For Rust: src/lib.rs -> crate
281            if file_path_str.ends_with(".rs") {
282                let module_path = Self::file_path_to_module_path(file_path_str);
283                cache.insert(module_path, entity_id);
284            }
285        }
286
287        cache
288    }
289
290    /// Convert file path to module path (public for use by ModuleResolver)
291    ///
292    /// Examples:
293    /// - "src/lib.rs" -> "crate"
294    /// - "src/main.rs" -> "crate"
295    /// - "src/foo.rs" -> "crate::foo"
296    /// - "src/foo/mod.rs" -> "crate::foo"
297    /// - "src/foo/bar.rs" -> "crate::foo::bar"
298    /// - "src/foo/bar/mod.rs" -> "crate::foo::bar"
299    pub fn file_path_to_module_path(file_path: &str) -> String {
300        // Remove leading "src/" or trailing ".rs"
301        let path = file_path.strip_prefix("src/").unwrap_or(file_path);
302        let path = path.strip_suffix(".rs").unwrap_or(path);
303
304        // Split by "/"
305        let parts: Vec<&str> = path.split('/').collect();
306
307        // Filter out "mod" parts (from mod.rs files) but only if they're standalone
308        // "foo/mod" -> remove "mod" -> "foo"
309        // "foo/bar/mod" -> remove "mod" -> "foo/bar"
310        let module_parts: Vec<&str> = parts
311            .iter()
312            .filter(|&&part| part != "mod").copied()
313            .collect();
314
315        // If we have lib.rs or main.rs, this is the crate root
316        if module_parts.len() == 1 && (module_parts[0] == "lib" || module_parts[0] == "main") {
317            return "crate".to_string();
318        }
319
320        // Build module path with crate:: prefix
321        let module_path = module_parts.join("::");
322        format!("crate::{}", module_path)
323    }
324}
325
326/// Delete edges whose from_id OR to_id is in the provided set of entity IDs.
327///
328/// Determinism: IDs must be pre-sorted by caller.
329pub fn delete_edges_touching_entities(
330    conn: &rusqlite::Connection,
331    entity_ids_sorted: &[i64],
332) -> Result<usize> {
333    use rusqlite::params_from_iter;
334
335    if entity_ids_sorted.is_empty() {
336        return Ok(0);
337    }
338
339    // Build placeholders for IN list.
340    let placeholders = std::iter::repeat_n("?", entity_ids_sorted.len())
341        .collect::<Vec<_>>()
342        .join(", ");
343
344    let sql = format!(
345        "DELETE FROM graph_edges WHERE from_id IN ({}) OR to_id IN ({})",
346        placeholders, placeholders
347    );
348
349    // Params are duplicated (for from_id and to_id IN lists).
350    let params = entity_ids_sorted
351        .iter()
352        .chain(entity_ids_sorted.iter()).copied();
353
354    let affected = conn
355        .execute(&sql, params_from_iter(params))
356        .map_err(|e| anyhow::anyhow!("Failed to delete edges touching entities: {}", e))?;
357
358    Ok(affected)
359}