magellan/graph/schema.rs
1//! Graph schema definitions for Magellan
2//!
3//! Defines node and edge payloads for sqlitegraph persistence.
4
5use serde::{Deserialize, Serialize};
6
7use anyhow::Result;
8
9/// File node payload stored in sqlitegraph
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct FileNode {
12 pub path: String,
13 pub hash: String,
14 /// Unix timestamp (seconds since epoch) when this file was last indexed
15 pub last_indexed_at: i64,
16 /// Unix timestamp (seconds since epoch) of filesystem mtime when indexed
17 pub last_modified: i64,
18}
19
20/// Symbol node payload stored in sqlitegraph
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct SymbolNode {
23 /// Stable symbol ID derived from (language, fqn, span_id)
24 ///
25 /// Generated via SHA-256 hash of language:fqn:span_id.
26 /// This ID is deterministic and stable across runs for the same symbol.
27 #[serde(default)]
28 pub symbol_id: Option<String>,
29
30 /// Fully-qualified name for this symbol
31 /// Format varies by language (crate::module::Name for Rust, package.Class.Name for Java)
32 /// This is the primary key for symbol lookup, preventing name collisions
33 #[serde(default)]
34 pub fqn: Option<String>,
35
36 /// Canonical fully-qualified name with file path for unambiguous identity
37 /// Format: crate_name::file_path::kind symbol_name
38 /// Example: my_crate::src/lib.rs::Function my_function
39 #[serde(default)]
40 pub canonical_fqn: Option<String>,
41
42 /// Display fully-qualified name for human-readable output
43 /// Shortened form without file path when possible
44 /// Example: my_crate::my_function
45 #[serde(default)]
46 pub display_fqn: Option<String>,
47
48 /// Simple symbol name (display name)
49 /// For user-facing output only. Not used as a unique identifier.
50 pub name: Option<String>,
51 pub kind: String,
52 #[serde(default)]
53 pub kind_normalized: Option<String>,
54 pub byte_start: usize,
55 pub byte_end: usize,
56 pub start_line: usize,
57 pub start_col: usize,
58 pub end_line: usize,
59 pub end_col: usize,
60}
61
62/// Reference node payload stored in sqlitegraph
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct ReferenceNode {
65 pub file: String,
66 pub byte_start: u64,
67 pub byte_end: u64,
68 pub start_line: u64,
69 pub start_col: u64,
70 pub end_line: u64,
71 pub end_col: u64,
72}
73
74/// Cross-file reference entry for efficient lookup
75///
76/// Stored separately from ReferenceNode to enable efficient cross-file
77/// reference queries without traversing the entire graph.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct CrossFileRef {
80 /// Symbol ID of the referencing symbol (source)
81 pub from_symbol_id: String,
82 /// Symbol ID of the referenced symbol (target)
83 pub to_symbol_id: String,
84 /// File path where the reference occurs
85 pub file_path: String,
86 /// Line number where the reference occurs (1-indexed)
87 pub line_number: usize,
88 /// Byte start position
89 pub byte_start: usize,
90 /// Byte end position
91 pub byte_end: usize,
92}
93
94/// Call node payload stored in sqlitegraph
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct CallNode {
97 pub file: String,
98 pub caller: String,
99 pub callee: String,
100 /// Stable symbol ID of the caller
101 #[serde(default)]
102 pub caller_symbol_id: Option<String>,
103 /// Stable symbol ID of the callee
104 #[serde(default)]
105 pub callee_symbol_id: Option<String>,
106 pub byte_start: u64,
107 pub byte_end: u64,
108 pub start_line: u64,
109 pub start_col: u64,
110 pub end_line: u64,
111 pub end_col: u64,
112}
113
114/// A single edge endpoint row for orphan detection.
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub struct EdgeEndpoints {
117 pub from_id: i64,
118 pub to_id: i64,
119}
120
121/// Control Flow Basic Block node payload stored in database
122///
123/// Represents a single basic block in a function's control flow graph.
124/// Basic blocks are sequences of statements with a single entry point
125/// and single exit point (via terminator).
126#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct CfgBlock {
128 /// Function symbol ID this block belongs to
129 pub function_id: i64,
130
131 /// Block kind (entry, conditional, loop, match, return, etc.)
132 pub kind: String,
133
134 /// Terminator kind (how control exits this block)
135 pub terminator: String,
136
137 /// Byte offset where block starts
138 pub byte_start: u64,
139
140 /// Byte offset where block ends
141 pub byte_end: u64,
142
143 /// Line where block starts (1-indexed)
144 pub start_line: u64,
145
146 /// Column where block starts (0-indexed)
147 pub start_col: u64,
148
149 /// Line where block ends (1-indexed)
150 pub end_line: u64,
151
152 /// Column where block ends (0-indexed)
153 pub end_col: u64,
154}
155
156/// Control Flow Edge payload stored in graph_edges table
157///
158/// Represents edges between basic blocks in a function's CFG.
159/// Uses edge_type = "CFG_BLOCK" for graph_edges entries.
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct CfgEdge {
162 /// Source block ID
163 pub from_block_id: i64,
164
165 /// Target block ID
166 pub to_block_id: i64,
167
168 /// Edge kind (unconditional, conditional_true, conditional_false, etc.)
169 pub kind: String,
170}
171
172/// Import node payload stored in sqlitegraph
173///
174/// Represents an import/use/from statement in source code.
175/// Stores metadata about what was imported and where.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct ImportNode {
178 /// File path containing this import
179 pub file: String,
180 /// Kind of import statement (use_crate, use_super, use_self, etc.)
181 pub import_kind: String,
182 /// Full import path as components (e.g., ["crate", "foo", "bar"] for crate::foo::bar)
183 pub import_path: Vec<String>,
184 /// Specific names imported (e.g., ["HashMap", "HashSet"] for use std::collections::* with specific items)
185 pub imported_names: Vec<String>,
186 /// Whether this is a glob import (e.g., use foo::* or from foo import *)
187 pub is_glob: bool,
188 /// Byte offset where import starts
189 pub byte_start: u64,
190 /// Byte offset where import ends
191 pub byte_end: u64,
192 /// Line where import starts (1-indexed)
193 pub start_line: u64,
194 /// Column where import starts (0-indexed)
195 pub start_col: u64,
196 /// Line where import ends (1-indexed)
197 pub end_line: u64,
198 /// Column where import ends (0-indexed)
199 pub end_col: u64,
200}
201
202/// Module path cache for efficient module resolution
203///
204/// Maps module paths (e.g., "crate::foo::bar") to file IDs.
205/// Used by ModuleResolver to convert relative paths to concrete file IDs.
206#[derive(Debug, Clone, Default)]
207pub struct ModulePathCache {
208 /// Cache mapping module path -> file_id
209 cache: std::collections::HashMap<String, i64>,
210}
211
212impl ModulePathCache {
213 /// Create a new empty cache
214 pub fn new() -> Self {
215 Self::default()
216 }
217
218 /// Insert a module path -> file_id mapping
219 pub fn insert(&mut self, module_path: String, file_id: i64) {
220 self.cache.insert(module_path, file_id);
221 }
222
223 /// Get file_id for a module path
224 pub fn get(&self, module_path: &str) -> Option<i64> {
225 self.cache.get(module_path).copied()
226 }
227
228 /// Get the number of entries in the cache
229 pub fn len(&self) -> usize {
230 self.cache.len()
231 }
232
233 /// Check if cache is empty
234 pub fn is_empty(&self) -> bool {
235 self.cache.is_empty()
236 }
237
238 /// Clear the cache
239 pub fn clear(&mut self) {
240 self.cache.clear();
241 }
242
243 /// Build the cache from indexed files in the database
244 ///
245 /// Scans all File nodes and extracts module declarations
246 /// to build path -> file_id mappings.
247 pub fn build_from_index(backend: &std::sync::Arc<dyn sqlitegraph::GraphBackend>) -> Self {
248 let mut cache = Self::new();
249
250 let snapshot = sqlitegraph::SnapshotId::current();
251 let entity_ids = match backend.entity_ids() {
252 Ok(ids) => ids,
253 Err(_) => return cache,
254 };
255
256 for entity_id in entity_ids {
257 let node = match backend.get_node(snapshot, entity_id) {
258 Ok(n) => n,
259 Err(_) => continue,
260 };
261
262 if node.kind != "File" {
263 continue;
264 }
265
266 // Get file_path from node.file_path or extract from FileNode data
267 let file_path = node.file_path.clone().or_else(|| {
268 // For V3 backend, file_path might not be set directly
269 // Extract from FileNode data
270 serde_json::from_value::<FileNode>(node.data.clone())
271 .ok()
272 .map(|file_node| file_node.path)
273 });
274
275 let file_path_str = file_path.as_deref().unwrap_or("");
276
277 // Build module path from file path
278 // For Rust: src/foo/bar.rs -> crate::foo::bar
279 // For Rust: src/foo/mod.rs -> crate::foo
280 // For Rust: src/lib.rs -> crate
281 if file_path_str.ends_with(".rs") {
282 let module_path = Self::file_path_to_module_path(file_path_str);
283 cache.insert(module_path, entity_id);
284 }
285 }
286
287 cache
288 }
289
290 /// Convert file path to module path (public for use by ModuleResolver)
291 ///
292 /// Examples:
293 /// - "src/lib.rs" -> "crate"
294 /// - "src/main.rs" -> "crate"
295 /// - "src/foo.rs" -> "crate::foo"
296 /// - "src/foo/mod.rs" -> "crate::foo"
297 /// - "src/foo/bar.rs" -> "crate::foo::bar"
298 /// - "src/foo/bar/mod.rs" -> "crate::foo::bar"
299 pub fn file_path_to_module_path(file_path: &str) -> String {
300 // Remove leading "src/" or trailing ".rs"
301 let path = file_path.strip_prefix("src/").unwrap_or(file_path);
302 let path = path.strip_suffix(".rs").unwrap_or(path);
303
304 // Split by "/"
305 let parts: Vec<&str> = path.split('/').collect();
306
307 // Filter out "mod" parts (from mod.rs files) but only if they're standalone
308 // "foo/mod" -> remove "mod" -> "foo"
309 // "foo/bar/mod" -> remove "mod" -> "foo/bar"
310 let module_parts: Vec<&str> = parts
311 .iter()
312 .filter(|&&part| part != "mod").copied()
313 .collect();
314
315 // If we have lib.rs or main.rs, this is the crate root
316 if module_parts.len() == 1 && (module_parts[0] == "lib" || module_parts[0] == "main") {
317 return "crate".to_string();
318 }
319
320 // Build module path with crate:: prefix
321 let module_path = module_parts.join("::");
322 format!("crate::{}", module_path)
323 }
324}
325
326/// Delete edges whose from_id OR to_id is in the provided set of entity IDs.
327///
328/// Determinism: IDs must be pre-sorted by caller.
329pub fn delete_edges_touching_entities(
330 conn: &rusqlite::Connection,
331 entity_ids_sorted: &[i64],
332) -> Result<usize> {
333 use rusqlite::params_from_iter;
334
335 if entity_ids_sorted.is_empty() {
336 return Ok(0);
337 }
338
339 // Build placeholders for IN list.
340 let placeholders = std::iter::repeat_n("?", entity_ids_sorted.len())
341 .collect::<Vec<_>>()
342 .join(", ");
343
344 let sql = format!(
345 "DELETE FROM graph_edges WHERE from_id IN ({}) OR to_id IN ({})",
346 placeholders, placeholders
347 );
348
349 // Params are duplicated (for from_id and to_id IN lists).
350 let params = entity_ids_sorted
351 .iter()
352 .chain(entity_ids_sorted.iter()).copied();
353
354 let affected = conn
355 .execute(&sql, params_from_iter(params))
356 .map_err(|e| anyhow::anyhow!("Failed to delete edges touching entities: {}", e))?;
357
358 Ok(affected)
359}