magellan/graph/algorithms.rs
1//! Graph algorithms for code analysis
2//!
3//! This module provides wrapper functions for sqlitegraph's algorithm library,
4//! exposing reachability analysis, dead code detection, cycle detection, call
5//! graph condensation, and impact analysis through Magellan's CodeGraph API.
6//!
7//! # Graph Views
8//!
9//! Magellan stores multiple edge types in the same graph:
10//! - **DEFINES**: File → Symbol (defines/contains relationship)
11//! - **REFERENCES**: Reference → Symbol (what symbol is referenced)
12//! - **CALLS**: Call node → Symbol (call graph edges)
13//! - **CALLER**: Symbol → Call node (reverse call graph edges)
14//!
15//! For call graph algorithms (reachability, dead code detection, SCC), we filter
16//! to **CALLS** edges only to traverse the call graph structure.
17//!
18//! # Clustered Adjacency
19//!
20//! Magellan uses sqlitegraph's clustered adjacency storage for ~10x graph
21//! traversal performance improvement when available.
22//!
23//! # Entity IDs vs Symbol IDs
24//!
25//! sqlitegraph algorithms work with **entity IDs** (i64 database row IDs),
26//! while Magellan's public API uses **stable symbol IDs** (32-char BLAKE3 hashes).
27//!
28//! This module provides translation functions:
29//! - `resolve_symbol_entity()`: Symbol ID → entity ID
30//! - `symbol_by_entity_id()`: entity ID → SymbolInfo
31//!
32//! # Algorithm Functions
33//!
34//! - [`CodeGraph::reachable_symbols()`]: Forward reachability from a symbol
35//! - [`CodeGraph::reverse_reachable_symbols()`]: Reverse reachability (callers)
36//! - [`CodeGraph::dead_symbols()`]: Dead code detection from entry point
37//! - [`CodeGraph::detect_cycles()`]: Find cycles using SCC decomposition
38//! - [`CodeGraph::find_cycles_containing()`]: Find cycles containing a specific symbol
39//! - [`CodeGraph::condense_call_graph()`]: Collapse SCCs to create condensation DAG
40//! - [`CodeGraph::enumerate_paths()`]: Path enumeration between symbols
41//! - [`CodeGraph::backward_slice()`]: Backward program slice (what affects this symbol)
42//! - [`CodeGraph::forward_slice()`]: Forward program slice (what this symbol affects)
43//!
44//! # Example
45//!
46//! \`\`\`no_run
47//! use magellan::CodeGraph;
48//!
49//! let mut graph = CodeGraph::open("codegraph.db")?;
50//!
51//! // Find all functions reachable from main
52//! let reachable = graph.reachable_symbols("main_symbol_id", None)?;
53//!
54//! // Find dead code unreachable from main
55//! let dead = graph.dead_symbols("main_symbol_id")?;
56//!
57//! // Find cycles (mutual recursion)
58//! let cycles = graph.detect_cycles()?;
59//!
60//! // Condense call graph for DAG analysis
61//! let condensed = graph.condense_call_graph()?;
62//! \`\`\`
63
64use ahash::{AHashMap, AHashSet};
65use anyhow::Result;
66use rusqlite::params;
67use sqlitegraph::errors::SqliteGraphError;
68use sqlitegraph::{GraphBackend, SnapshotId};
69use std::collections::{HashMap, HashSet, VecDeque};
70use std::sync::Arc;
71
72use crate::graph::schema::SymbolNode;
73
74use super::CodeGraph;
75
76/// Backend-agnostic reachable_from implementation
77///
78/// Uses `fetch_outgoing` from GraphBackend trait instead of requiring SqliteGraph.
79fn reachable_from(
80 backend: &dyn GraphBackend,
81 start: i64,
82) -> Result<AHashSet<i64>, SqliteGraphError> {
83 let mut visited = AHashSet::new();
84 let mut queue = VecDeque::new();
85
86 visited.insert(start);
87 queue.push_back(start);
88
89 while let Some(node) = queue.pop_front() {
90 for neighbor in backend.fetch_outgoing(node)? {
91 if visited.insert(neighbor) {
92 queue.push_back(neighbor);
93 }
94 }
95 }
96
97 Ok(visited)
98}
99
100/// Backend-agnostic reverse_reachable_from implementation
101///
102/// Uses `fetch_incoming` from GraphBackend trait instead of requiring SqliteGraph.
103fn reverse_reachable_from(
104 backend: &dyn GraphBackend,
105 start: i64,
106) -> Result<AHashSet<i64>, SqliteGraphError> {
107 let mut visited = AHashSet::new();
108 let mut queue = VecDeque::new();
109
110 visited.insert(start);
111 queue.push_back(start);
112
113 while let Some(node) = queue.pop_front() {
114 for neighbor in backend.fetch_incoming(node)? {
115 if visited.insert(neighbor) {
116 queue.push_back(neighbor);
117 }
118 }
119 }
120
121 Ok(visited)
122}
123
124/// Result of SCC collapse operation
125#[derive(Debug, Clone)]
126struct SccCollapseResult {
127 /// Maps each original node ID to its SCC supernode ID
128 _node_to_supernode: AHashMap<i64, i64>,
129 /// Maps each supernode ID to the set of original node IDs in that SCC
130 supernode_members: AHashMap<i64, AHashSet<i64>>,
131 /// Edges between supernodes in the condensed DAG
132 supernode_edges: Vec<(i64, i64)>,
133 /// Total number of SCCs found
134 _num_sccs: usize,
135}
136
137/// Backend-agnostic collapse_sccs implementation
138///
139/// Uses `all_entity_ids` and `fetch_outgoing` from GraphBackend trait.
140fn collapse_sccs(backend: &dyn GraphBackend) -> Result<SccCollapseResult, SqliteGraphError> {
141 // Step 1: Compute SCCs using our backend-agnostic implementation
142 let scc_result = strongly_connected_components(backend)?;
143
144 // Step 2: Handle empty graph
145 if scc_result.components.is_empty() {
146 return Ok(SccCollapseResult {
147 _node_to_supernode: AHashMap::new(),
148 supernode_members: AHashMap::new(),
149 supernode_edges: Vec::new(),
150 _num_sccs: 0,
151 });
152 }
153
154 // Step 3: Build node_to_supernode and supernode_members mappings
155 let mut node_to_supernode: AHashMap<i64, i64> = AHashMap::new();
156 let mut supernode_members: AHashMap<i64, AHashSet<i64>> = AHashMap::new();
157
158 for component in &scc_result.components {
159 let supernode_id = component[0]; // Use first member as supernode ID
160 let mut members = AHashSet::new();
161 for &node in component {
162 node_to_supernode.insert(node, supernode_id);
163 members.insert(node);
164 }
165 supernode_members.insert(supernode_id, members);
166 }
167
168 // Step 4: Build supernode edges (condensed graph)
169 let mut supernode_edges: Vec<(i64, i64)> = Vec::new();
170 let mut seen_edges: HashSet<(i64, i64)> = HashSet::new();
171
172 for (&node, &supernode) in &node_to_supernode {
173 for neighbor in backend.fetch_outgoing(node)? {
174 if let Some(&neighbor_supernode) = node_to_supernode.get(&neighbor) {
175 // Only add edge if it's between different supernodes
176 if supernode != neighbor_supernode {
177 let edge = (supernode, neighbor_supernode);
178 if seen_edges.insert(edge) {
179 supernode_edges.push(edge);
180 }
181 }
182 }
183 }
184 }
185
186 // Sort edges for deterministic output
187 supernode_edges.sort();
188
189 Ok(SccCollapseResult {
190 _node_to_supernode: node_to_supernode,
191 supernode_members,
192 supernode_edges,
193 _num_sccs: scc_result.components.len(),
194 })
195}
196
197/// Path enumeration result for backend-agnostic implementation
198#[derive(Debug, Clone)]
199struct InternalPathEnumerationResult {
200 /// All found paths (each path is a sequence of node IDs)
201 paths: Vec<Vec<i64>>,
202 /// Total number of paths found (before max_paths limit)
203 total_found: usize,
204 /// Number of paths pruned by bounds (max_depth, max_paths)
205 pruned_by_bounds: usize,
206 /// Maximum depth reached during enumeration
207 _max_depth_reached: usize,
208}
209
210/// Configuration for path enumeration
211#[derive(Debug, Clone)]
212struct PathEnumerationConfig {
213 /// Maximum depth to explore
214 max_depth: usize,
215 /// Maximum number of paths to return
216 max_paths: usize,
217 /// Maximum times to revisit a node (prevents infinite loops)
218 revisit_cap: usize,
219 /// Optional set of nodes that terminate path exploration
220 exit_nodes: Option<AHashSet<i64>>,
221 /// Optional set of nodes that represent errors
222 _error_nodes: Option<AHashSet<i64>>,
223}
224
225impl Default for PathEnumerationConfig {
226 fn default() -> Self {
227 Self {
228 max_depth: 100,
229 max_paths: 1000,
230 revisit_cap: 100,
231 exit_nodes: None,
232 _error_nodes: None,
233 }
234 }
235}
236
237/// Backend-agnostic enumerate_paths implementation
238///
239/// Uses `fetch_outgoing` from GraphBackend trait.
240fn enumerate_paths(
241 backend: &dyn GraphBackend,
242 entry: i64,
243 config: &PathEnumerationConfig,
244) -> Result<InternalPathEnumerationResult, SqliteGraphError> {
245 let mut paths = Vec::new();
246 let mut current_path = Vec::new();
247 let mut visit_count: AHashMap<i64, usize> = AHashMap::new();
248 let mut total_found = 0usize;
249 let mut pruned_by_bounds = 0usize;
250 let mut max_depth_reached = 0usize;
251
252 dfs_enumerate(
253 backend,
254 entry,
255 config,
256 &mut current_path,
257 &mut visit_count,
258 &mut paths,
259 &mut total_found,
260 &mut pruned_by_bounds,
261 &mut max_depth_reached,
262 )?;
263
264 Ok(InternalPathEnumerationResult {
265 paths,
266 total_found,
267 pruned_by_bounds,
268 _max_depth_reached: max_depth_reached,
269 })
270}
271
272/// DFS helper for path enumeration
273#[allow(clippy::too_many_arguments)]
274fn dfs_enumerate(
275 backend: &dyn GraphBackend,
276 node: i64,
277 config: &PathEnumerationConfig,
278 current_path: &mut Vec<i64>,
279 visit_count: &mut AHashMap<i64, usize>,
280 all_paths: &mut Vec<Vec<i64>>,
281 total_found: &mut usize,
282 pruned_by_bounds: &mut usize,
283 max_depth_reached: &mut usize,
284) -> Result<(), SqliteGraphError> {
285 // Update visit count for this node
286 let count = visit_count.entry(node).or_insert(0);
287 *count += 1;
288
289 // Check revisit cap
290 if *count > config.revisit_cap {
291 visit_count.entry(node).and_modify(|e| *e -= 1);
292 return Ok(());
293 }
294
295 // Add node to current path
296 current_path.push(node);
297 let current_depth = current_path.len();
298 *max_depth_reached = (*max_depth_reached).max(current_depth);
299
300 // Check max depth
301 if current_depth >= config.max_depth {
302 *pruned_by_bounds += 1;
303 current_path.pop();
304 visit_count.entry(node).and_modify(|e| *e -= 1);
305 return Ok(());
306 }
307
308 // Check if this is an exit node
309 if let Some(ref exits) = config.exit_nodes {
310 if exits.contains(&node) && current_depth > 1 {
311 // Save this path (exclude entry, include exit)
312 if all_paths.len() < config.max_paths {
313 all_paths.push(current_path.clone());
314 }
315 *total_found += 1;
316 current_path.pop();
317 visit_count.entry(node).and_modify(|e| *e -= 1);
318 return Ok(());
319 }
320 }
321
322 // Explore neighbors
323 let neighbors = backend.fetch_outgoing(node)?;
324 let mut had_successors = false;
325
326 for neighbor in neighbors {
327 had_successors = true;
328 dfs_enumerate(
329 backend,
330 neighbor,
331 config,
332 current_path,
333 visit_count,
334 all_paths,
335 total_found,
336 pruned_by_bounds,
337 max_depth_reached,
338 )?;
339 }
340
341 // If no successors and no exit nodes specified, save path
342 if !had_successors && config.exit_nodes.is_none() && current_depth > 1 {
343 if all_paths.len() < config.max_paths {
344 all_paths.push(current_path.clone());
345 }
346 *total_found += 1;
347 }
348
349 // Backtrack
350 current_path.pop();
351 visit_count.entry(node).and_modify(|e| *e -= 1);
352
353 Ok(())
354}
355
356/// Result of strongly connected components computation
357#[derive(Debug, Clone)]
358struct SccResult {
359 /// List of SCCs, each is a vector of node IDs
360 components: Vec<Vec<i64>>,
361 /// Mapping from node ID to component index
362 node_to_component: AHashMap<i64, usize>,
363}
364
365/// Backend-agnostic strongly_connected_components implementation
366///
367/// Uses `all_entity_ids` and `fetch_outgoing` from GraphBackend trait.
368/// Implements Tarjan's SCC algorithm.
369fn strongly_connected_components(
370 backend: &dyn GraphBackend,
371) -> Result<SccResult, SqliteGraphError> {
372 let all_ids = backend.all_entity_ids()?;
373
374 if all_ids.is_empty() {
375 return Ok(SccResult {
376 components: Vec::new(),
377 node_to_component: AHashMap::new(),
378 });
379 }
380
381 let mut index = 0usize;
382 let mut stack: Vec<i64> = Vec::new();
383 let mut on_stack: HashSet<i64> = HashSet::new();
384 let mut indices: AHashMap<i64, usize> = AHashMap::new();
385 let mut lowlinks: AHashMap<i64, usize> = AHashMap::new();
386 let mut components: Vec<Vec<i64>> = Vec::new();
387 let mut node_to_component: AHashMap<i64, usize> = AHashMap::new();
388
389 fn strongconnect(
390 v: i64,
391 backend: &dyn GraphBackend,
392 index: &mut usize,
393 stack: &mut Vec<i64>,
394 on_stack: &mut HashSet<i64>,
395 indices: &mut AHashMap<i64, usize>,
396 lowlinks: &mut AHashMap<i64, usize>,
397 components: &mut Vec<Vec<i64>>,
398 node_to_component: &mut AHashMap<i64, usize>,
399 ) -> Result<(), SqliteGraphError> {
400 indices.insert(v, *index);
401 lowlinks.insert(v, *index);
402 *index += 1;
403 stack.push(v);
404 on_stack.insert(v);
405
406 for w in backend.fetch_outgoing(v)? {
407 if !indices.contains_key(&w) {
408 strongconnect(
409 w,
410 backend,
411 index,
412 stack,
413 on_stack,
414 indices,
415 lowlinks,
416 components,
417 node_to_component,
418 )?;
419 let v_low = lowlinks[&v];
420 let w_low = lowlinks[&w];
421 lowlinks.insert(v, v_low.min(w_low));
422 } else if on_stack.contains(&w) {
423 let v_low = lowlinks[&v];
424 let w_idx = indices[&w];
425 lowlinks.insert(v, v_low.min(w_idx));
426 }
427 }
428
429 if lowlinks[&v] == indices[&v] {
430 let mut component = Vec::new();
431 loop {
432 let w = stack.pop().unwrap(); // M-UNWRAP: Tarjan SCC invariant guarantees stack non-empty
433 on_stack.remove(&w);
434 node_to_component.insert(w, components.len());
435 component.push(w);
436 if w == v {
437 break;
438 }
439 }
440 components.push(component);
441 }
442
443 Ok(())
444 }
445
446 for &node in &all_ids {
447 if !indices.contains_key(&node) {
448 strongconnect(
449 node,
450 backend,
451 &mut index,
452 &mut stack,
453 &mut on_stack,
454 &mut indices,
455 &mut lowlinks,
456 &mut components,
457 &mut node_to_component,
458 )?;
459 }
460 }
461
462 Ok(SccResult {
463 components,
464 node_to_component,
465 })
466}
467
468/// Symbol information for algorithm results
469///
470/// Contains the key metadata needed to identify and locate a symbol.
471#[derive(Debug, Clone, PartialEq, Eq)]
472pub struct SymbolInfo {
473 /// Stable symbol ID (32-char BLAKE3 hash)
474 pub symbol_id: Option<String>,
475 /// Fully-qualified name
476 pub fqn: Option<String>,
477 /// File path containing the symbol
478 pub file_path: String,
479 /// Symbol kind (Function, Method, Class, etc.)
480 pub kind: String,
481}
482
483/// Dead symbol information
484///
485/// Extends [`SymbolInfo`] with a reason why the symbol is considered dead.
486#[derive(Debug, Clone, PartialEq, Eq)]
487pub struct DeadSymbol {
488 /// Base symbol information
489 pub symbol: SymbolInfo,
490 /// Reason why this symbol is unreachable/dead
491 pub reason: String,
492}
493
494/// Cycle kind classification
495#[derive(Debug, Clone, PartialEq, Eq)]
496pub enum CycleKind {
497 /// Multiple symbols calling each other (SCC with >1 member)
498 MutualRecursion,
499 /// Single symbol that calls itself (direct self-loop)
500 SelfLoop,
501}
502
503/// Cycle information for detected cycles
504///
505/// Represents a strongly connected component (SCC) with more than one member,
506/// indicating mutual recursion or a cycle in the call graph.
507#[derive(Debug, Clone, PartialEq, Eq)]
508pub struct Cycle {
509 /// All symbols that participate in this cycle
510 pub members: Vec<SymbolInfo>,
511 /// Classification of the cycle type
512 pub kind: CycleKind,
513}
514
515/// Cycle detection report
516///
517/// Result of running [`CodeGraph::detect_cycles()`], containing all cycles
518/// found in the call graph.
519#[derive(Debug, Clone, PartialEq, Eq)]
520pub struct CycleReport {
521 /// All detected cycles
522 pub cycles: Vec<Cycle>,
523 /// Total number of cycles found
524 pub total_count: usize,
525}
526
527/// Supernode in a condensation graph
528///
529/// Represents an SCC collapsed into a single node for DAG analysis.
530#[derive(Debug, Clone, PartialEq, Eq)]
531pub struct Supernode {
532 /// Supernode ID (stable identifier for this SCC)
533 pub id: i64,
534 /// All symbols that are members of this SCC/supernode
535 pub members: Vec<SymbolInfo>,
536}
537
538/// Condensation graph (DAG after SCC collapse)
539///
540/// Represents the call graph after collapsing all SCCs into supernodes.
541/// The condensation graph is always a DAG (no cycles).
542#[derive(Debug, Clone, PartialEq, Eq)]
543pub struct CondensationGraph {
544 /// All supernodes in the condensed graph
545 pub supernodes: Vec<Supernode>,
546 /// Edges between supernodes (from_supernode_id, to_supernode_id)
547 pub edges: Vec<(i64, i64)>,
548}
549
550/// Condensation result with symbol-to-supernode mapping
551///
552/// Result of running [`CodeGraph::condense_call_graph()`], providing
553/// both the condensed DAG and the mapping from original symbols to supernodes.
554#[derive(Debug, Clone, PartialEq, Eq)]
555pub struct CondensationResult {
556 /// The condensed DAG
557 pub graph: CondensationGraph,
558 /// Maps symbol_id to the supernode ID containing that symbol
559 pub original_to_supernode: HashMap<String, i64>,
560}
561
562/// Direction of program slicing
563#[derive(Debug, Clone, Copy, PartialEq, Eq)]
564pub enum SliceDirection {
565 /// Backward slice: what affects this symbol (reverse reachability)
566 Backward,
567 /// Forward slice: what this symbol affects (forward reachability)
568 Forward,
569}
570
571/// Program slice result
572///
573/// Contains the slice results and statistics for a program slicing operation.
574#[derive(Debug, Clone, PartialEq, Eq)]
575pub struct ProgramSlice {
576 /// Target symbol for the slice
577 pub target: SymbolInfo,
578 /// Direction of the slice
579 pub direction: SliceDirection,
580 /// Symbols included in the slice
581 pub included_symbols: Vec<SymbolInfo>,
582 /// Number of symbols in the slice
583 pub symbol_count: usize,
584}
585
586/// Program slice result with statistics
587///
588/// Wraps a [`ProgramSlice`] with additional statistics about the slice.
589#[derive(Debug, Clone, PartialEq, Eq)]
590pub struct SliceResult {
591 /// The slice itself
592 pub slice: ProgramSlice,
593 /// Statistics about the slice
594 pub statistics: SliceStatistics,
595}
596
597/// Statistics for a program slice
598#[derive(Debug, Clone, PartialEq, Eq)]
599pub struct SliceStatistics {
600 /// Total number of symbols in the slice
601 pub total_symbols: usize,
602 /// Number of data dependencies
603 /// Note: Set to 0 for call-graph fallback (not computed without full CFG)
604 pub data_dependencies: usize,
605 /// Number of control dependencies
606 /// For call-graph fallback, this equals total_symbols (callers/callees)
607 pub control_dependencies: usize,
608}
609
610/// Execution path in the call graph
611///
612/// Represents a single path through the call graph from a starting symbol
613/// to an ending symbol, with metadata about the path.
614#[derive(Debug, Clone, PartialEq, Eq)]
615pub struct ExecutionPath {
616 /// Symbols along the path in order from start to end
617 pub symbols: Vec<SymbolInfo>,
618 /// Number of symbols in the path
619 pub length: usize,
620}
621
622/// Path enumeration result
623///
624/// Contains all discovered execution paths and statistics about the enumeration.
625#[derive(Debug, Clone)]
626pub struct PathEnumerationResult {
627 /// All discovered paths
628 pub paths: Vec<ExecutionPath>,
629 /// Total number of paths enumerated
630 pub total_enumerated: usize,
631 /// Whether enumeration was cut off due to bounds
632 pub bounded_hit: bool,
633 /// Statistics about the discovered paths
634 pub statistics: PathStatistics,
635}
636
637/// Statistics for path enumeration
638#[derive(Debug, Clone)]
639pub struct PathStatistics {
640 /// Average path length
641 pub avg_length: f64,
642 /// Minimum path length
643 pub min_length: usize,
644 /// Maximum path length
645 pub max_length: usize,
646 /// Number of unique symbols across all paths
647 pub unique_symbols: usize,
648}
649
650impl CodeGraph {
651 /// Resolve a stable symbol ID or FQN to its entity ID
652 ///
653 /// First tries to lookup by symbol_id (32-char BLAKE3 hash).
654 /// If not found, falls back to FQN lookup for convenience.
655 ///
656 /// # Arguments
657 /// * `symbol_id_or_fqn` - Stable symbol ID (32-char BLAKE3 hash) or FQN
658 ///
659 /// # Returns
660 /// The entity ID (i64 database row ID) for the symbol
661 ///
662 /// # Errors
663 /// Returns an error if the symbol is not found in the database
664 pub fn resolve_symbol_entity(&self, symbol_id_or_fqn: &str) -> Result<i64> {
665 let conn = self.chunks.connect()?;
666
667 // First try: lookup by symbol_id
668 let mut stmt = conn
669 .prepare_cached(
670 "SELECT id FROM graph_entities
671 WHERE kind = 'Symbol'
672 AND json_extract(data, '$.symbol_id') = ?1",
673 )
674 .map_err(|e| anyhow::anyhow!("Failed to prepare symbol ID query: {}", e))?;
675
676 let result = stmt.query_row(params![symbol_id_or_fqn], |row| row.get::<_, i64>(0));
677
678 match result {
679 Ok(entity_id) => return Ok(entity_id),
680 Err(rusqlite::Error::QueryReturnedNoRows) => {
681 // Fallback: try FQN lookup
682 }
683 Err(e) => {
684 return Err(anyhow::anyhow!("Failed to query symbol ID: {}", e));
685 }
686 }
687
688 // Fallback: lookup by FQN or display_fqn
689 let mut stmt = conn
690 .prepare_cached(
691 "SELECT id FROM graph_entities
692 WHERE kind = 'Symbol'
693 AND (json_extract(data, '$.fqn') = ?1
694 OR json_extract(data, '$.display_fqn') = ?1
695 OR json_extract(data, '$.canonical_fqn') = ?1)",
696 )
697 .map_err(|e| anyhow::anyhow!("Failed to prepare FQN query: {}", e))?;
698
699 stmt.query_row(params![symbol_id_or_fqn], |row| row.get::<_, i64>(0))
700 .map_err(|e| match e {
701 rusqlite::Error::QueryReturnedNoRows => {
702 anyhow::anyhow!(
703 "Symbol '{}' not found in database (tried symbol_id, fqn, display_fqn, canonical_fqn)",
704 symbol_id_or_fqn
705 )
706 }
707 _ => anyhow::anyhow!("Failed to query symbol by FQN: {}", e),
708 })
709 }
710
711 /// Get symbol information by entity ID
712 ///
713 /// # Arguments
714 /// * `entity_id` - Entity ID (i64 database row ID)
715 ///
716 /// # Returns
717 /// SymbolInfo with metadata from the symbol node
718 pub fn symbol_by_entity_id(&self, entity_id: i64) -> Result<SymbolInfo> {
719 let snapshot = SnapshotId::current();
720 let node = self
721 .calls
722 .backend
723 .get_node(snapshot, entity_id)
724 .map_err(|e| anyhow::anyhow!("Failed to get entity {}: {}", entity_id, e))?;
725
726 if node.kind != "Symbol" {
727 return Err(anyhow::anyhow!(
728 "Entity {} is not a Symbol (kind: {})",
729 entity_id,
730 node.kind
731 ));
732 }
733
734 let symbol_node: SymbolNode = serde_json::from_value(node.data)
735 .map_err(|e| anyhow::anyhow!("Failed to parse SymbolNode data: {}", e))?;
736
737 Ok(SymbolInfo {
738 symbol_id: symbol_node.symbol_id,
739 fqn: symbol_node.fqn.or_else(|| symbol_node.display_fqn.clone()),
740 file_path: node.file_path.unwrap_or_else(|| "?".to_string()),
741 kind: symbol_node.kind,
742 })
743 }
744
745 /// Get all Symbol entity IDs in the call graph
746 ///
747 /// Returns entity IDs for all Symbol nodes that are part of the call graph
748 /// (have incoming or outgoing CALLS edges).
749 ///
750 /// # Returns
751 /// Vector of entity IDs for call graph symbols
752 fn all_call_graph_entities(&self) -> Result<Vec<i64>> {
753 let conn = self.chunks.connect()?;
754
755 // Find all symbols that participate in CALLS edges
756 // (either as caller via CALLER edges or as callee via CALLS edges)
757 let mut stmt = conn
758 .prepare_cached(
759 "SELECT DISTINCT from_id FROM graph_edges WHERE edge_type = 'CALLER'
760 UNION
761 SELECT DISTINCT to_id FROM graph_edges WHERE edge_type = 'CALLS'",
762 )
763 .map_err(|e| anyhow::anyhow!("Failed to prepare call graph query: {}", e))?;
764
765 let entity_ids = stmt
766 .query_map([], |row| row.get::<_, i64>(0))
767 .map_err(|e| anyhow::anyhow!("Failed to execute call graph query: {}", e))?
768 .collect::<Result<Vec<_>, _>>()
769 .map_err(|e| anyhow::anyhow!("Failed to collect call graph results: {}", e))?;
770
771 Ok(entity_ids)
772 }
773
774 /// Find all symbols reachable from a given symbol (forward reachability)
775 ///
776 /// Computes the transitive closure of the call graph starting from the
777 /// specified symbol. Returns all symbols that can be reached by following
778 /// CALLS edges from the starting symbol.
779 ///
780 /// # Graph View
781 ///
782 /// This operates on the **call graph** (CALLS edges only), not other edge types.
783 /// The starting symbol itself is NOT included in the results.
784 ///
785 /// # Arguments
786 /// * `symbol_id` - Stable symbol ID to start from (or FQN as fallback)
787 /// * `max_depth` - Optional maximum depth limit (None = unlimited)
788 ///
789 /// # Returns
790 /// Vector of [`SymbolInfo`] for reachable symbols, sorted deterministically
791 ///
792 /// # Example
793 ///
794 /// \`\`\`no_run
795 /// # use magellan::CodeGraph;
796 /// # let mut graph = CodeGraph::open("test.db").unwrap();
797 /// // Find all functions called from main (directly or indirectly)
798 /// let reachable = graph.reachable_symbols("main", None)?;
799 /// \`\`\`
800 pub fn reachable_symbols(
801 &self,
802 symbol_id: &str,
803 _max_depth: Option<usize>,
804 ) -> Result<Vec<SymbolInfo>> {
805 let entity_id = self.resolve_symbol_entity(symbol_id)?;
806 let backend = &*self.calls.backend;
807
808 // Use backend-agnostic reachable_from implementation
809 // This traverses outgoing edges from the start node
810 let reachable_entity_ids = reachable_from(backend, entity_id)?;
811
812 // Convert entity IDs to SymbolInfo
813 let mut symbols = Vec::new();
814 for id in reachable_entity_ids {
815 // Skip the starting symbol itself
816 if id == entity_id {
817 continue;
818 }
819
820 if let Ok(info) = self.symbol_by_entity_id(id) {
821 symbols.push(info);
822 }
823 }
824
825 // Sort deterministically for stable output
826 symbols.sort_by(|a, b| {
827 a.file_path
828 .cmp(&b.file_path)
829 .then_with(|| a.fqn.as_ref().cmp(&b.fqn.as_ref()))
830 .then_with(|| a.kind.cmp(&b.kind))
831 });
832
833 Ok(symbols)
834 }
835
836 /// Find all symbols that can reach a given symbol (reverse reachability)
837 ///
838 /// Computes the reverse transitive closure of the call graph. Returns all
839 /// symbols from which the specified symbol can be reached by following
840 /// CALLS edges (i.e., all callers that directly or indirectly call this symbol).
841 ///
842 /// # Graph View
843 ///
844 /// This operates on the **call graph** (CALLS edges only).
845 ///
846 /// # Arguments
847 /// * `symbol_id` - Stable symbol ID to analyze
848 /// * `max_depth` - Optional maximum depth limit (None = unlimited)
849 ///
850 /// # Returns
851 /// Vector of [`SymbolInfo`] for symbols that can reach the target, sorted deterministically
852 ///
853 /// # Example
854 ///
855 /// \`\`\`no_run
856 /// # use magellan::CodeGraph;
857 /// # let mut graph = CodeGraph::open("test.db").unwrap();
858 /// // Find all functions that directly or indirectly call 'helper_function'
859 /// let callers = graph.reverse_reachable_symbols("helper_symbol_id", None)?;
860 /// \`\`\`
861 pub fn reverse_reachable_symbols(
862 &self,
863 symbol_id: &str,
864 _max_depth: Option<usize>,
865 ) -> Result<Vec<SymbolInfo>> {
866 let entity_id = self.resolve_symbol_entity(symbol_id)?;
867 let backend = &*self.calls.backend;
868
869 // Use backend-agnostic reverse_reachable_from implementation
870 // This traverses incoming edges to the target node
871 let reachable_entity_ids = reverse_reachable_from(backend, entity_id)?;
872
873 // Convert entity IDs to SymbolInfo
874 let mut symbols = Vec::new();
875 for id in reachable_entity_ids {
876 // Skip the starting symbol itself
877 if id == entity_id {
878 continue;
879 }
880
881 if let Ok(info) = self.symbol_by_entity_id(id) {
882 symbols.push(info);
883 }
884 }
885
886 // Sort deterministically for stable output
887 symbols.sort_by(|a, b| {
888 a.file_path
889 .cmp(&b.file_path)
890 .then_with(|| a.fqn.as_ref().cmp(&b.fqn.as_ref()))
891 .then_with(|| a.kind.cmp(&b.kind))
892 });
893
894 Ok(symbols)
895 }
896
897 /// Find dead code unreachable from an entry point symbol
898 ///
899 /// Identifies all symbols in the call graph that cannot be reached from
900 /// the specified entry point (e.g., `main`, `test_main`). This is useful
901 /// for detecting unused functions and dead code.
902 ///
903 /// # Graph View
904 ///
905 /// This operates on the **call graph** (CALLS edges only). Symbols not
906 /// participating in call edges are not considered.
907 ///
908 /// # Limitations
909 ///
910 /// - Only considers the call graph. Symbols called via reflection,
911 /// function pointers, or dynamic dispatch may be incorrectly flagged.
912 /// - Test functions, benchmark code, and platform-specific code may
913 /// appear as dead code if not reachable from the specified entry point.
914 ///
915 /// # Arguments
916 /// * `entry_symbol_id` - Stable symbol ID of the entry point (e.g., main function)
917 ///
918 /// # Returns
919 /// Vector of [`DeadSymbol`] for unreachable symbols, sorted deterministically
920 ///
921 /// # Example
922 ///
923 /// \`\`\`no_run
924 /// # use magellan::CodeGraph;
925 /// # let mut graph = CodeGraph::open("test.db").unwrap();
926 /// // Find all functions unreachable from main
927 /// let dead = graph.dead_symbols("main_symbol_id")?;
928 /// for dead_symbol in dead {
929 /// println!("Dead: {} in {} ({})",
930 /// dead_symbol.symbol.fqn.as_deref().unwrap_or("?"),
931 /// dead_symbol.symbol.file_path,
932 /// dead_symbol.reason);
933 /// }
934 /// \`\`\`
935 pub fn dead_symbols(&self, entry_symbol_id: &str) -> Result<Vec<DeadSymbol>> {
936 let entry_entity = self.resolve_symbol_entity(entry_symbol_id)?;
937
938 // Get all call graph entities
939 let all_entities = self.all_call_graph_entities()?;
940
941 // Find all entities reachable from the entry point
942 let backend = &*self.calls.backend;
943 let reachable_ids = reachable_from(backend, entry_entity)?;
944
945 // Dead symbols = all entities - reachable entities
946 let reachable_set: HashSet<i64> = reachable_ids.into_iter().collect();
947 let mut dead_symbols = Vec::new();
948
949 for entity_id in all_entities {
950 // Skip the entry point itself
951 if entity_id == entry_entity {
952 continue;
953 }
954
955 // If not reachable from entry point, it's dead code
956 if !reachable_set.contains(&entity_id) {
957 if let Ok(info) = self.symbol_by_entity_id(entity_id) {
958 dead_symbols.push(DeadSymbol {
959 reason: "unreachable from entry point".to_string(),
960 symbol: info,
961 });
962 }
963 }
964 }
965
966 // Sort deterministically for stable output
967 dead_symbols.sort_by(|a, b| {
968 a.symbol
969 .file_path
970 .cmp(&b.symbol.file_path)
971 .then_with(|| a.symbol.fqn.as_ref().cmp(&b.symbol.fqn.as_ref()))
972 .then_with(|| a.symbol.kind.cmp(&b.symbol.kind))
973 });
974
975 Ok(dead_symbols)
976 }
977
978 /// Detect cycles in the call graph using SCC decomposition
979 ///
980 /// Finds all strongly connected components (SCCs) with more than one member,
981 /// which indicate cycles or mutual recursion in the call graph.
982 ///
983 /// # Graph View
984 ///
985 /// This operates on the **call graph** (CALLS edges only).
986 ///
987 /// # Cycle Detection
988 ///
989 /// Uses Tarjan's SCC algorithm to find strongly connected components.
990 /// Only SCCs with more than one member are reported as cycles (MutualRecursion).
991 /// Single-node SCCs are not cycles (unless they have self-loops).
992 ///
993 /// # Returns
994 /// [`CycleReport`] containing all detected cycles
995 ///
996 /// # Example
997 ///
998 /// \`\`\`no_run
999 /// # use magellan::CodeGraph;
1000 /// # let mut graph = CodeGraph::open("test.db").unwrap();
1001 /// let report = graph.detect_cycles()?;
1002 /// println!("Found {} cycles", report.total_count);
1003 /// for cycle in &report.cycles {
1004 /// println!("Cycle with {} members:", cycle.members.len());
1005 /// for member in &cycle.members {
1006 /// println!(" - {}", member.fqn.as_deref().unwrap_or("?"));
1007 /// }
1008 /// }
1009 /// \`\`\`
1010 pub fn detect_cycles(&self) -> Result<CycleReport> {
1011 let backend = &*self.calls.backend;
1012
1013 // Use backend-agnostic strongly_connected_components implementation
1014 let scc_result = strongly_connected_components(backend)?;
1015
1016 // Filter to SCCs with >1 member (mutual recursion)
1017 let cycles: Vec<_> = scc_result
1018 .components
1019 .into_iter()
1020 .filter(|scc| scc.len() > 1)
1021 .map(|members| {
1022 // Convert entity IDs to SymbolInfo
1023 let symbol_infos: Vec<_> = members
1024 .into_iter()
1025 .filter_map(|id| self.symbol_by_entity_id(id).ok())
1026 .collect();
1027
1028 Cycle {
1029 members: symbol_infos,
1030 kind: CycleKind::MutualRecursion,
1031 }
1032 })
1033 .filter(|cycle| !cycle.members.is_empty())
1034 .collect();
1035
1036 let total_count = cycles.len();
1037
1038 // Sort cycles deterministically
1039 let mut cycles = cycles;
1040 cycles.sort_by(|a, b| match (a.members.first(), b.members.first()) {
1041 (Some(am), Some(bm)) => match (am.fqn.as_ref(), bm.fqn.as_ref()) {
1042 (Some(af), Some(bf)) => af.cmp(bf),
1043 (Some(_), None) => std::cmp::Ordering::Less,
1044 (None, Some(_)) => std::cmp::Ordering::Greater,
1045 (None, None) => std::cmp::Ordering::Equal,
1046 },
1047 (Some(_), None) => std::cmp::Ordering::Less,
1048 (None, Some(_)) => std::cmp::Ordering::Greater,
1049 (None, None) => std::cmp::Ordering::Equal,
1050 });
1051
1052 Ok(CycleReport {
1053 cycles,
1054 total_count,
1055 })
1056 }
1057
1058 /// Find cycles containing a specific symbol
1059 ///
1060 /// Returns only the cycles that include the specified symbol in their member set.
1061 ///
1062 /// # Arguments
1063 /// * `symbol_id` - Stable symbol ID or FQN to search for
1064 ///
1065 /// # Returns
1066 /// Vector of [`Cycle`] containing the specified symbol
1067 ///
1068 /// # Example
1069 ///
1070 /// \`\`\`no_run
1071 /// # use magellan::CodeGraph;
1072 /// # let mut graph = CodeGraph::open("test.db").unwrap();
1073 /// let cycles = graph.find_cycles_containing("problematic_function")?;
1074 /// if cycles.is_empty() {
1075 /// println!("No cycles found containing this symbol");
1076 /// } else {
1077 /// println!("Found {} cycles containing this symbol", cycles.len());
1078 /// }
1079 /// \`\`\`
1080 pub fn find_cycles_containing(&self, symbol_id: &str) -> Result<Vec<Cycle>> {
1081 let entity_id = self.resolve_symbol_entity(symbol_id)?;
1082 let backend = &*self.calls.backend;
1083
1084 // Use backend-agnostic strongly_connected_components implementation
1085 let scc_result = strongly_connected_components(backend)?;
1086
1087 // Find which SCC contains this entity
1088 let target_component_idx = scc_result.node_to_component.get(&entity_id);
1089
1090 let target_idx = match target_component_idx {
1091 Some(&idx) => idx,
1092 None => return Ok(Vec::new()), // Symbol not in any SCC (shouldn't happen)
1093 };
1094
1095 // Check if this SCC is a cycle (has >1 member)
1096 let target_component = &scc_result.components[target_idx];
1097 if target_component.len() <= 1 {
1098 // Single node SCC - not a cycle (unless self-loop, but that's rare)
1099 return Ok(Vec::new());
1100 }
1101
1102 // Convert this SCC to a Cycle
1103 let symbol_infos: Vec<_> = target_component
1104 .iter()
1105 .filter_map(|&id| self.symbol_by_entity_id(id).ok())
1106 .collect();
1107
1108 let cycle = Cycle {
1109 members: symbol_infos,
1110 kind: CycleKind::MutualRecursion,
1111 };
1112
1113 Ok(vec![cycle])
1114 }
1115
1116 /// Condense the call graph by collapsing SCCs into supernodes
1117 ///
1118 /// Creates a condensation DAG by collapsing each strongly connected component
1119 /// into a single "supernode". The resulting graph is always acyclic, making it
1120 /// suitable for topological analysis and safe refactoring.
1121 ///
1122 /// # Graph View
1123 ///
1124 /// This operates on the **call graph** (CALLS edges only).
1125 ///
1126 /// # Use Cases
1127 ///
1128 /// - **Topological Sorting**: Condensation graph is a DAG, enabling topo sort
1129 /// - **Mutual Recursion Detection**: Large supernodes indicate tight coupling
1130 /// - **Impact Analysis**: Changing one symbol affects its entire SCC
1131 ///
1132 /// # Returns
1133 /// [`CondensationResult`] with the condensed DAG and symbol-to-supernode mapping
1134 ///
1135 /// # Example
1136 ///
1137 /// \`\`\`no_run
1138 /// # use magellan::CodeGraph;
1139 /// # let mut graph = CodeGraph::open("test.db").unwrap();
1140 /// let condensed = graph.condense_call_graph()?;
1141 ///
1142 /// println!("Condensed to {} supernodes", condensed.graph.supernodes.len());
1143 /// println!("Condensed graph has {} edges", condensed.graph.edges.len());
1144 ///
1145 /// // Check which SCC a symbol belongs to
1146 /// if let Some(supernode_id) = condensed.original_to_supernode.get("some_symbol_id") {
1147 /// println!("Symbol is in SCC {}", supernode_id);
1148 /// }
1149 /// \`\`\`
1150 pub fn condense_call_graph(&self) -> Result<CondensationResult> {
1151 let backend = &*self.calls.backend;
1152
1153 // Use backend-agnostic collapse_sccs implementation
1154 let collapse_result = collapse_sccs(backend)?;
1155
1156 // Build supernodes with SymbolInfo members
1157 let mut supernodes = Vec::new();
1158 let mut original_to_supernode = HashMap::new();
1159
1160 for (&supernode_id, member_ids) in &collapse_result.supernode_members {
1161 let symbol_infos: Vec<_> = member_ids
1162 .iter()
1163 .filter_map(|&id| self.symbol_by_entity_id(id).ok())
1164 .collect();
1165
1166 // Build mapping from symbol_id to supernode
1167 for symbol_info in &symbol_infos {
1168 if let Some(ref sym_id) = symbol_info.symbol_id {
1169 original_to_supernode.insert(sym_id.clone(), supernode_id);
1170 }
1171 }
1172
1173 supernodes.push(Supernode {
1174 id: supernode_id,
1175 members: symbol_infos,
1176 });
1177 }
1178
1179 // Sort supernodes deterministically
1180 supernodes.sort_by_key(|a| a.id);
1181
1182 let graph = CondensationGraph {
1183 supernodes,
1184 edges: collapse_result.supernode_edges,
1185 };
1186
1187 Ok(CondensationResult {
1188 graph,
1189 original_to_supernode,
1190 })
1191 }
1192
1193 /// Compute a backward program slice (what affects this symbol)
1194 ///
1195 /// Returns all symbols that can affect the target symbol through the call graph.
1196 /// This is useful for bug isolation - finding all code that could influence
1197 /// a given symbol's behavior.
1198 ///
1199 /// # Graph View (Call-Graph Fallback)
1200 ///
1201 /// **Current implementation uses call-graph reachability as a fallback.**
1202 /// Full CFG-based program slicing requires control dependence graph (CDG)
1203 /// which needs post-dominators and AST CFG integration not yet available.
1204 ///
1205 /// The fallback implementation uses reverse reachability on the call graph,
1206 /// finding all callers that directly or indirectly call this symbol.
1207 ///
1208 /// # Limitations
1209 ///
1210 /// - Uses call-graph reachability instead of full CFG-based slicing
1211 /// - Does not include data flow dependencies within functions
1212 /// - Does not include control flow from conditionals/loops
1213 /// - Full slicing will be available when AST CFG edges are integrated
1214 ///
1215 /// # Arguments
1216 /// * `symbol_id` - Stable symbol ID or FQN to slice from
1217 ///
1218 /// # Returns
1219 /// [`SliceResult`] containing the slice and statistics
1220 ///
1221 /// # Example
1222 ///
1223 /// \`\`\`no_run
1224 /// # use magellan::CodeGraph;
1225 /// # let mut graph = CodeGraph::open("test.db").unwrap();
1226 /// // Find what affects 'helper_function'
1227 /// let slice_result = graph.backward_slice("helper_function")?;
1228 /// println!("{} symbols affect this function", slice_result.slice.symbol_count);
1229 /// for symbol in &slice_result.slice.included_symbols {
1230 /// println!(" - {}", symbol.fqn.as_deref().unwrap_or("?"));
1231 /// }
1232 /// \`\`\`
1233 pub fn backward_slice(&self, symbol_id: &str) -> Result<SliceResult> {
1234 let entity_id = self.resolve_symbol_entity(symbol_id)?;
1235 let backend = &*self.calls.backend;
1236
1237 // Get target symbol info
1238 let target = self.symbol_by_entity_id(entity_id)?;
1239
1240 // Use backend-agnostic reverse_reachable_from on call graph
1241 // This finds all callers that directly or indirectly call this symbol
1242 let caller_entity_ids = reverse_reachable_from(backend, entity_id)?;
1243
1244 // Convert entity IDs to SymbolInfo
1245 let mut included_symbols = Vec::new();
1246 for id in caller_entity_ids {
1247 // Skip the starting symbol itself
1248 if id == entity_id {
1249 continue;
1250 }
1251
1252 if let Ok(info) = self.symbol_by_entity_id(id) {
1253 included_symbols.push(info);
1254 }
1255 }
1256
1257 // Sort deterministically
1258 included_symbols.sort_by(|a, b| {
1259 a.file_path
1260 .cmp(&b.file_path)
1261 .then_with(|| a.fqn.as_ref().cmp(&b.fqn.as_ref()))
1262 .then_with(|| a.kind.cmp(&b.kind))
1263 });
1264
1265 let symbol_count = included_symbols.len();
1266
1267 Ok(SliceResult {
1268 slice: ProgramSlice {
1269 target,
1270 direction: SliceDirection::Backward,
1271 included_symbols,
1272 symbol_count,
1273 },
1274 statistics: SliceStatistics {
1275 total_symbols: symbol_count,
1276 data_dependencies: 0, // Not available in call-graph fallback
1277 control_dependencies: symbol_count,
1278 },
1279 })
1280 }
1281
1282 /// Compute a forward program slice (what this symbol affects)
1283 ///
1284 /// Returns all symbols that the target symbol can affect through the call graph.
1285 /// This is useful for refactoring safety - finding all code that could be
1286 /// impacted by changes to this symbol.
1287 ///
1288 /// # Graph View (Call-Graph Fallback)
1289 ///
1290 /// **Current implementation uses call-graph reachability as a fallback.**
1291 /// Full CFG-based program slicing requires control dependence graph (CDG)
1292 /// which needs post-dominators and AST CFG integration not yet available.
1293 ///
1294 /// The fallback implementation uses forward reachability on the call graph,
1295 /// finding all callees that this symbol directly or indirectly calls.
1296 ///
1297 /// # Limitations
1298 ///
1299 /// - Uses call-graph reachability instead of full CFG-based slicing
1300 /// - Does not include data flow dependencies within functions
1301 /// - Does not include control flow from conditionals/loops
1302 /// - Full slicing will be available when AST CFG edges are integrated
1303 ///
1304 /// # Arguments
1305 /// * `symbol_id` - Stable symbol ID or FQN to slice from
1306 ///
1307 /// # Returns
1308 /// [`SliceResult`] containing the slice and statistics
1309 ///
1310 /// # Example
1311 ///
1312 /// \`\`\`no_run
1313 /// # use magellan::CodeGraph;
1314 /// # let mut graph = CodeGraph::open("test.db").unwrap();
1315 /// // Find what 'main_function' affects
1316 /// let slice_result = graph.forward_slice("main_function")?;
1317 /// println!("{} symbols are affected by this function", slice_result.slice.symbol_count);
1318 /// for symbol in &slice_result.slice.included_symbols {
1319 /// println!(" - {}", symbol.fqn.as_deref().unwrap_or("?"));
1320 /// }
1321 /// \`\`\`
1322 pub fn forward_slice(&self, symbol_id: &str) -> Result<SliceResult> {
1323 let entity_id = self.resolve_symbol_entity(symbol_id)?;
1324 let backend = &*self.calls.backend;
1325
1326 // Get target symbol info
1327 let target = self.symbol_by_entity_id(entity_id)?;
1328
1329 // Use backend-agnostic reachable_from on call graph
1330 // This finds all callees that this symbol directly or indirectly calls
1331 let callee_entity_ids = reachable_from(backend, entity_id)?;
1332
1333 // Convert entity IDs to SymbolInfo
1334 let mut included_symbols = Vec::new();
1335 for id in callee_entity_ids {
1336 // Skip the starting symbol itself
1337 if id == entity_id {
1338 continue;
1339 }
1340
1341 if let Ok(info) = self.symbol_by_entity_id(id) {
1342 included_symbols.push(info);
1343 }
1344 }
1345
1346 // Sort deterministically
1347 included_symbols.sort_by(|a, b| {
1348 a.file_path
1349 .cmp(&b.file_path)
1350 .then_with(|| a.fqn.as_ref().cmp(&b.fqn.as_ref()))
1351 .then_with(|| a.kind.cmp(&b.kind))
1352 });
1353
1354 let symbol_count = included_symbols.len();
1355
1356 Ok(SliceResult {
1357 slice: ProgramSlice {
1358 target,
1359 direction: SliceDirection::Forward,
1360 included_symbols,
1361 symbol_count,
1362 },
1363 statistics: SliceStatistics {
1364 total_symbols: symbol_count,
1365 data_dependencies: 0, // Not available in call-graph fallback
1366 control_dependencies: symbol_count,
1367 },
1368 })
1369 }
1370
1371 /// Enumerate execution paths from a starting symbol
1372 ///
1373 /// Finds all execution paths from `start_symbol_id` to `end_symbol_id` (if provided)
1374 /// or all paths starting from `start_symbol_id` (if end_symbol_id is None).
1375 ///
1376 /// Path enumeration uses bounded DFS to prevent infinite traversal in cyclic graphs:
1377 /// - `max_depth`: Maximum path length (number of edges)
1378 /// - `max_paths`: Maximum number of paths to return
1379 /// - `revisit_cap`: Maximum number of times a single node can be revisited (prevents infinite loops)
1380 ///
1381 /// # Arguments
1382 ///
1383 /// * `start_symbol_id` - Starting symbol ID or FQN
1384 /// * `end_symbol_id` - Optional ending symbol ID or FQN (if None, enumerates all paths from start)
1385 /// * `max_depth` - Maximum path depth (default: 100)
1386 /// * `max_paths` - Maximum number of paths to return (default: 1000)
1387 ///
1388 /// # Returns
1389 ///
1390 /// Returns a [`PathEnumerationResult`] containing:
1391 /// - All discovered paths
1392 /// - Whether enumeration hit bounds
1393 /// - Statistics about path lengths and unique symbols
1394 ///
1395 /// # Example
1396 ///
1397 /// \`\`\`no_run
1398 /// # use magellan::CodeGraph;
1399 /// # fn main() -> anyhow::Result<()> {
1400 /// let mut graph = CodeGraph::open("codegraph.db")?;
1401 ///
1402 /// // Find all paths from main to any leaf function
1403 /// let result = graph.enumerate_paths("main", None, 50, 100)?;
1404 ///
1405 /// println!("Found {} paths", result.total_enumerated);
1406 /// println!("Average length: {:.2}", result.statistics.avg_length);
1407 /// for (i, path) in result.paths.iter().enumerate() {
1408 /// println!("Path {}: {:?}", path.symbols.iter().map(|s| s.fqn.as_deref().unwrap_or("?")).collect::<Vec<_>>());
1409 /// }
1410 /// # Ok(())
1411 /// # }
1412 /// \`\`\`
1413 pub fn enumerate_paths(
1414 &self,
1415 start_symbol_id: &str,
1416 end_symbol_id: Option<&str>,
1417 max_depth: usize,
1418 max_paths: usize,
1419 ) -> Result<PathEnumerationResult> {
1420 let start_entity_id = self.resolve_symbol_entity(start_symbol_id)?;
1421 let backend = &*self.calls.backend;
1422
1423 // Build exit_nodes set for target symbol
1424 let exit_nodes: Option<AHashSet<i64>> = if let Some(end_id) = end_symbol_id {
1425 let end_entity_id = self.resolve_symbol_entity(end_id)?;
1426 let mut set = AHashSet::new();
1427 set.insert(end_entity_id);
1428 Some(set)
1429 } else {
1430 None
1431 };
1432
1433 // Use backend-agnostic path enumeration
1434 let config = PathEnumerationConfig {
1435 max_depth,
1436 max_paths,
1437 revisit_cap: 100, // Prevent infinite loops in cyclic graphs
1438 exit_nodes,
1439 _error_nodes: None,
1440 };
1441
1442 let enum_result = enumerate_paths(backend, start_entity_id, &config)?;
1443
1444 // Convert path enumeration result to our format
1445 let mut paths = Vec::new();
1446 let mut all_symbols = HashSet::new();
1447 let mut min_length = usize::MAX;
1448 let mut max_length = 0;
1449 let mut total_length = 0;
1450
1451 for path in enum_result.paths {
1452 let mut symbols = Vec::new();
1453
1454 for &entity_id in &path {
1455 if let Ok(info) = self.symbol_by_entity_id(entity_id) {
1456 all_symbols.insert(info.symbol_id.clone().unwrap_or_default());
1457 symbols.push(info);
1458 }
1459 }
1460
1461 let length = symbols.len();
1462 if length > 0 {
1463 min_length = min_length.min(length);
1464 max_length = max_length.max(length);
1465 total_length += length;
1466
1467 paths.push(ExecutionPath { symbols, length });
1468 }
1469 }
1470
1471 // Sort paths: first by starting symbol FQN, then by length
1472 paths.sort_by(|a, b| {
1473 match (
1474 a.symbols.first().and_then(|s| s.fqn.as_ref()),
1475 b.symbols.first().and_then(|s| s.fqn.as_ref()),
1476 ) {
1477 (Some(a_fqn), Some(b_fqn)) => {
1478 a_fqn.cmp(b_fqn).then_with(|| a.length.cmp(&b.length))
1479 }
1480 (Some(_), None) => std::cmp::Ordering::Less,
1481 (None, Some(_)) => std::cmp::Ordering::Greater,
1482 (None, None) => a.length.cmp(&b.length),
1483 }
1484 });
1485
1486 let avg_length = if paths.is_empty() {
1487 0.0
1488 } else {
1489 total_length as f64 / paths.len() as f64
1490 };
1491
1492 // Determine if we hit bounds
1493 let bounded_hit = enum_result.pruned_by_bounds > 0;
1494
1495 Ok(PathEnumerationResult {
1496 paths,
1497 total_enumerated: enum_result.total_found,
1498 bounded_hit,
1499 statistics: PathStatistics {
1500 avg_length,
1501 min_length: if min_length == usize::MAX {
1502 0
1503 } else {
1504 min_length
1505 },
1506 max_length,
1507 unique_symbols: all_symbols.len(),
1508 },
1509 })
1510 }
1511}
1512
1513#[cfg(test)]
1514mod tests {
1515 use super::*;
1516 use crate::CodeGraph;
1517
1518 use std::sync::atomic::{AtomicU64, Ordering};
1519
1520 static TEST_DIR_COUNTER: AtomicU64 = AtomicU64::new(0);
1521
1522 fn next_test_dir() -> std::path::PathBuf {
1523 let n = TEST_DIR_COUNTER.fetch_add(1, Ordering::SeqCst);
1524 std::env::temp_dir().join(format!("magellan_test_{}_{}", std::process::id(), n))
1525 }
1526
1527 /// Test helper to create a simple call graph for testing
1528 ///
1529 /// Creates:
1530 /// - main() -> helper_a() -> leaf()
1531 /// - main() -> helper_b() -> leaf()
1532 /// - unused_function() -> leaf()
1533 ///
1534 /// Returns the CodeGraph and symbol IDs for main and unused_function
1535 fn create_test_graph() -> Result<(CodeGraph, String, String)> {
1536 // Use a persistent temp directory that won't be deleted
1537 // This is necessary for V3 backend which needs files to remain accessible.
1538 // Use a unique suffix per call to avoid collisions when tests run in parallel.
1539 let temp_dir = std::env::temp_dir().join(format!(
1540 "magellan_test_{}_{}",
1541 std::process::id(),
1542 std::time::SystemTime::now()
1543 .duration_since(std::time::UNIX_EPOCH)
1544 .unwrap_or_default()
1545 .as_nanos()
1546 ));
1547 std::fs::create_dir_all(&temp_dir)?;
1548 let db_path = temp_dir.join("test.db");
1549
1550 let source = r#"
1551fn main() {
1552 helper_a();
1553 helper_b();
1554}
1555
1556fn helper_a() {
1557 leaf();
1558}
1559
1560fn helper_b() {
1561 leaf();
1562}
1563
1564fn leaf() {
1565 println!("leaf");
1566}
1567
1568fn unused_function() {
1569 leaf();
1570}
1571"#;
1572
1573 let mut graph = CodeGraph::open(&db_path)?;
1574 // Index the file - use a temporary file path
1575 let test_file = temp_dir.join("test.rs");
1576 std::fs::write(&test_file, source)?;
1577 let path_str = test_file.to_string_lossy().to_string();
1578 let source_bytes = std::fs::read(&test_file)?;
1579
1580 // Index symbols and calls
1581 graph.index_file(&path_str, &source_bytes)?;
1582 graph.index_calls(&path_str, &source_bytes)?;
1583
1584 // Find the symbol IDs for main and unused_function
1585 let symbols = crate::graph::query::symbols_in_file(&mut graph, &path_str)?;
1586 let main_id = symbols
1587 .iter()
1588 .find(|s| s.name.as_deref() == Some("main"))
1589 .and_then(|s| s.fqn.clone())
1590 .unwrap_or_default();
1591
1592 let unused_id = symbols
1593 .iter()
1594 .find(|s| s.name.as_deref() == Some("unused_function"))
1595 .and_then(|s| s.fqn.clone())
1596 .unwrap_or_default();
1597
1598 // For testing, use the symbol's FQN directly
1599 // In a real scenario with proper SymbolId generation, we'd use that
1600 Ok((graph, main_id, unused_id))
1601 }
1602
1603 #[test]
1604 fn test_resolve_symbol_entity_not_found() {
1605 let (graph, _, _) = create_test_graph().unwrap();
1606 let result = graph.resolve_symbol_entity("nonexistent_id_123456789012");
1607 assert!(result.is_err());
1608 }
1609
1610 #[test]
1611 fn test_symbol_by_entity_id() {
1612 let (graph, _, _) = create_test_graph().unwrap();
1613
1614 // Get all entity IDs
1615 let entity_ids = graph.calls.backend.entity_ids().unwrap();
1616 let snapshot = SnapshotId::current();
1617
1618 // Find a Symbol entity
1619 for entity_id in entity_ids {
1620 if let Ok(node) = graph.calls.backend.get_node(snapshot, entity_id) {
1621 if node.kind == "Symbol" {
1622 let info = graph.symbol_by_entity_id(entity_id);
1623 assert!(info.is_ok());
1624 let symbol_info = info.unwrap();
1625 assert!(!symbol_info.file_path.is_empty());
1626 assert!(!symbol_info.kind.is_empty());
1627 return;
1628 }
1629 }
1630 }
1631
1632 panic!("No Symbol entity found in test graph");
1633 }
1634
1635 #[test]
1636 fn test_reachable_symbols_basic() {
1637 let (graph, _main_id, _unused_id) = create_test_graph().unwrap();
1638
1639 // Get all symbols and verify we can query them
1640 let entity_ids = graph.calls.backend.entity_ids().unwrap();
1641 eprintln!("Total entities: {}", entity_ids.len());
1642 let snapshot = SnapshotId::current();
1643 let mut found_symbols = 0;
1644
1645 for entity_id in &entity_ids {
1646 match graph.calls.backend.get_node(snapshot, *entity_id) {
1647 Ok(node) => {
1648 eprintln!(" Entity {}: kind={}", entity_id, node.kind);
1649 if node.kind == "Symbol" {
1650 found_symbols += 1;
1651 }
1652 }
1653 Err(e) => {
1654 eprintln!(" Entity {}: ERROR getting node: {:?}", entity_id, e);
1655 }
1656 }
1657 }
1658
1659 // We should have found at least some symbols
1660 assert!(
1661 found_symbols > 0,
1662 "Should find Symbol entities in test graph, got {} symbols from {} entities",
1663 found_symbols,
1664 entity_ids.len()
1665 );
1666 }
1667
1668 #[test]
1669 fn test_reachable_symbols_max_depth() {
1670 let (graph, _main_id, _unused_id) = create_test_graph().unwrap();
1671
1672 // Get the main function's entity ID
1673 let snapshot = SnapshotId::current();
1674 let entity_ids = graph.calls.backend.entity_ids().unwrap();
1675
1676 let main_entity_id = entity_ids.into_iter().find(|&id| {
1677 if let Ok(node) = graph.calls.backend.get_node(snapshot, id) {
1678 if let Ok(data) = serde_json::from_value::<serde_json::Value>(node.data) {
1679 if let Some(name) = data.get("name").and_then(|v| v.as_str()) {
1680 return name == "main";
1681 }
1682 }
1683 }
1684 false
1685 });
1686
1687 if let Some(entity_id) = main_entity_id {
1688 // Verify we can get the node
1689 let result = graph.calls.backend.get_node(snapshot, entity_id);
1690 assert!(result.is_ok(), "Should be able to get main node");
1691 }
1692 }
1693
1694 #[test]
1695 fn test_dead_symbols() {
1696 let (graph, _main_id, _unused_id) = create_test_graph().unwrap();
1697
1698 // Get all entity IDs
1699 let entity_ids = graph.calls.backend.entity_ids().unwrap();
1700
1701 // We should have some entities in the call graph
1702 assert!(entity_ids.len() > 0, "Should have call graph entities");
1703 }
1704
1705 #[test]
1706 fn test_reverse_reachable_symbols() {
1707 let (graph, _main_id, _unused_id) = create_test_graph().unwrap();
1708
1709 // Get all entity IDs
1710 let entity_ids = graph.calls.backend.entity_ids().unwrap();
1711
1712 // We should have some entities in the call graph
1713 assert!(entity_ids.len() > 0, "Should have call graph entities");
1714 }
1715}