magellan/graph/
algorithms.rs

1//! Graph algorithms for code analysis
2//!
3//! This module provides wrapper functions for sqlitegraph's algorithm library,
4//! exposing reachability analysis, dead code detection, cycle detection, call
5//! graph condensation, and impact analysis through Magellan's CodeGraph API.
6//!
7//! # Graph Views
8//!
9//! Magellan stores multiple edge types in the same graph:
10//! - **DEFINES**: File → Symbol (defines/contains relationship)
11//! - **REFERENCES**: Reference → Symbol (what symbol is referenced)
12//! - **CALLS**: Call node → Symbol (call graph edges)
13//! - **CALLER**: Symbol → Call node (reverse call graph edges)
14//!
15//! For call graph algorithms (reachability, dead code detection, SCC), we filter
16//! to **CALLS** edges only to traverse the call graph structure.
17//!
18//! # Clustered Adjacency
19//!
20//! Magellan uses sqlitegraph's clustered adjacency storage for ~10x graph
21//! traversal performance improvement when available.
22//!
23//! # Entity IDs vs Symbol IDs
24//!
25//! sqlitegraph algorithms work with **entity IDs** (i64 database row IDs),
26//! while Magellan's public API uses **stable symbol IDs** (32-char BLAKE3 hashes).
27//!
28//! This module provides translation functions:
29//! - `resolve_symbol_entity()`: Symbol ID → entity ID
30//! - `symbol_by_entity_id()`: entity ID → SymbolInfo
31//!
32//! # Algorithm Functions
33//!
34//! - [`CodeGraph::reachable_symbols()`]: Forward reachability from a symbol
35//! - [`CodeGraph::reverse_reachable_symbols()`]: Reverse reachability (callers)
36//! - [`CodeGraph::dead_symbols()`]: Dead code detection from entry point
37//! - [`CodeGraph::detect_cycles()`]: Find cycles using SCC decomposition
38//! - [`CodeGraph::find_cycles_containing()`]: Find cycles containing a specific symbol
39//! - [`CodeGraph::condense_call_graph()`]: Collapse SCCs to create condensation DAG
40//! - [`CodeGraph::enumerate_paths()`]: Path enumeration between symbols
41//! - [`CodeGraph::backward_slice()`]: Backward program slice (what affects this symbol)
42//! - [`CodeGraph::forward_slice()`]: Forward program slice (what this symbol affects)
43//!
44//! # Example
45//!
46//! \`\`\`no_run
47//! use magellan::CodeGraph;
48//!
49//! let mut graph = CodeGraph::open("codegraph.db")?;
50//!
51//! // Find all functions reachable from main
52//! let reachable = graph.reachable_symbols("main_symbol_id", None)?;
53//!
54//! // Find dead code unreachable from main
55//! let dead = graph.dead_symbols("main_symbol_id")?;
56//!
57//! // Find cycles (mutual recursion)
58//! let cycles = graph.detect_cycles()?;
59//!
60//! // Condense call graph for DAG analysis
61//! let condensed = graph.condense_call_graph()?;
62//! \`\`\`
63
64use ahash::{AHashMap, AHashSet};
65use anyhow::Result;
66use rusqlite::params;
67use sqlitegraph::errors::SqliteGraphError;
68use sqlitegraph::{GraphBackend, SnapshotId};
69use std::collections::{HashMap, HashSet, VecDeque};
70use std::sync::Arc;
71
72use crate::graph::schema::SymbolNode;
73
74use super::CodeGraph;
75
76/// Backend-agnostic reachable_from implementation
77///
78/// Uses `fetch_outgoing` from GraphBackend trait instead of requiring SqliteGraph.
79fn reachable_from(
80    backend: &dyn GraphBackend,
81    start: i64,
82) -> Result<AHashSet<i64>, SqliteGraphError> {
83    let mut visited = AHashSet::new();
84    let mut queue = VecDeque::new();
85
86    visited.insert(start);
87    queue.push_back(start);
88
89    while let Some(node) = queue.pop_front() {
90        for neighbor in backend.fetch_outgoing(node)? {
91            if visited.insert(neighbor) {
92                queue.push_back(neighbor);
93            }
94        }
95    }
96
97    Ok(visited)
98}
99
100/// Backend-agnostic reverse_reachable_from implementation
101///
102/// Uses `fetch_incoming` from GraphBackend trait instead of requiring SqliteGraph.
103fn reverse_reachable_from(
104    backend: &dyn GraphBackend,
105    start: i64,
106) -> Result<AHashSet<i64>, SqliteGraphError> {
107    let mut visited = AHashSet::new();
108    let mut queue = VecDeque::new();
109
110    visited.insert(start);
111    queue.push_back(start);
112
113    while let Some(node) = queue.pop_front() {
114        for neighbor in backend.fetch_incoming(node)? {
115            if visited.insert(neighbor) {
116                queue.push_back(neighbor);
117            }
118        }
119    }
120
121    Ok(visited)
122}
123
124/// Result of SCC collapse operation
125#[derive(Debug, Clone)]
126struct SccCollapseResult {
127    /// Maps each original node ID to its SCC supernode ID
128    _node_to_supernode: AHashMap<i64, i64>,
129    /// Maps each supernode ID to the set of original node IDs in that SCC
130    supernode_members: AHashMap<i64, AHashSet<i64>>,
131    /// Edges between supernodes in the condensed DAG
132    supernode_edges: Vec<(i64, i64)>,
133    /// Total number of SCCs found
134    _num_sccs: usize,
135}
136
137/// Backend-agnostic collapse_sccs implementation
138///
139/// Uses `all_entity_ids` and `fetch_outgoing` from GraphBackend trait.
140fn collapse_sccs(backend: &dyn GraphBackend) -> Result<SccCollapseResult, SqliteGraphError> {
141    // Step 1: Compute SCCs using our backend-agnostic implementation
142    let scc_result = strongly_connected_components(backend)?;
143
144    // Step 2: Handle empty graph
145    if scc_result.components.is_empty() {
146        return Ok(SccCollapseResult {
147            _node_to_supernode: AHashMap::new(),
148            supernode_members: AHashMap::new(),
149            supernode_edges: Vec::new(),
150            _num_sccs: 0,
151        });
152    }
153
154    // Step 3: Build node_to_supernode and supernode_members mappings
155    let mut node_to_supernode: AHashMap<i64, i64> = AHashMap::new();
156    let mut supernode_members: AHashMap<i64, AHashSet<i64>> = AHashMap::new();
157
158    for component in &scc_result.components {
159        let supernode_id = component[0]; // Use first member as supernode ID
160        let mut members = AHashSet::new();
161        for &node in component {
162            node_to_supernode.insert(node, supernode_id);
163            members.insert(node);
164        }
165        supernode_members.insert(supernode_id, members);
166    }
167
168    // Step 4: Build supernode edges (condensed graph)
169    let mut supernode_edges: Vec<(i64, i64)> = Vec::new();
170    let mut seen_edges: HashSet<(i64, i64)> = HashSet::new();
171
172    for (&node, &supernode) in &node_to_supernode {
173        for neighbor in backend.fetch_outgoing(node)? {
174            if let Some(&neighbor_supernode) = node_to_supernode.get(&neighbor) {
175                // Only add edge if it's between different supernodes
176                if supernode != neighbor_supernode {
177                    let edge = (supernode, neighbor_supernode);
178                    if seen_edges.insert(edge) {
179                        supernode_edges.push(edge);
180                    }
181                }
182            }
183        }
184    }
185
186    // Sort edges for deterministic output
187    supernode_edges.sort();
188
189    Ok(SccCollapseResult {
190        _node_to_supernode: node_to_supernode,
191        supernode_members,
192        supernode_edges,
193        _num_sccs: scc_result.components.len(),
194    })
195}
196
197/// Path enumeration result for backend-agnostic implementation
198#[derive(Debug, Clone)]
199struct InternalPathEnumerationResult {
200    /// All found paths (each path is a sequence of node IDs)
201    paths: Vec<Vec<i64>>,
202    /// Total number of paths found (before max_paths limit)
203    total_found: usize,
204    /// Number of paths pruned by bounds (max_depth, max_paths)
205    pruned_by_bounds: usize,
206    /// Maximum depth reached during enumeration
207    _max_depth_reached: usize,
208}
209
210/// Configuration for path enumeration
211#[derive(Debug, Clone)]
212struct PathEnumerationConfig {
213    /// Maximum depth to explore
214    max_depth: usize,
215    /// Maximum number of paths to return
216    max_paths: usize,
217    /// Maximum times to revisit a node (prevents infinite loops)
218    revisit_cap: usize,
219    /// Optional set of nodes that terminate path exploration
220    exit_nodes: Option<AHashSet<i64>>,
221    /// Optional set of nodes that represent errors
222    _error_nodes: Option<AHashSet<i64>>,
223}
224
225impl Default for PathEnumerationConfig {
226    fn default() -> Self {
227        Self {
228            max_depth: 100,
229            max_paths: 1000,
230            revisit_cap: 100,
231            exit_nodes: None,
232            _error_nodes: None,
233        }
234    }
235}
236
237/// Backend-agnostic enumerate_paths implementation
238///
239/// Uses `fetch_outgoing` from GraphBackend trait.
240fn enumerate_paths(
241    backend: &dyn GraphBackend,
242    entry: i64,
243    config: &PathEnumerationConfig,
244) -> Result<InternalPathEnumerationResult, SqliteGraphError> {
245    let mut paths = Vec::new();
246    let mut current_path = Vec::new();
247    let mut visit_count: AHashMap<i64, usize> = AHashMap::new();
248    let mut total_found = 0usize;
249    let mut pruned_by_bounds = 0usize;
250    let mut max_depth_reached = 0usize;
251
252    dfs_enumerate(
253        backend,
254        entry,
255        config,
256        &mut current_path,
257        &mut visit_count,
258        &mut paths,
259        &mut total_found,
260        &mut pruned_by_bounds,
261        &mut max_depth_reached,
262    )?;
263
264    Ok(InternalPathEnumerationResult {
265        paths,
266        total_found,
267        pruned_by_bounds,
268        _max_depth_reached: max_depth_reached,
269    })
270}
271
272/// DFS helper for path enumeration
273#[allow(clippy::too_many_arguments)]
274fn dfs_enumerate(
275    backend: &dyn GraphBackend,
276    node: i64,
277    config: &PathEnumerationConfig,
278    current_path: &mut Vec<i64>,
279    visit_count: &mut AHashMap<i64, usize>,
280    all_paths: &mut Vec<Vec<i64>>,
281    total_found: &mut usize,
282    pruned_by_bounds: &mut usize,
283    max_depth_reached: &mut usize,
284) -> Result<(), SqliteGraphError> {
285    // Update visit count for this node
286    let count = visit_count.entry(node).or_insert(0);
287    *count += 1;
288
289    // Check revisit cap
290    if *count > config.revisit_cap {
291        visit_count.entry(node).and_modify(|e| *e -= 1);
292        return Ok(());
293    }
294
295    // Add node to current path
296    current_path.push(node);
297    let current_depth = current_path.len();
298    *max_depth_reached = (*max_depth_reached).max(current_depth);
299
300    // Check max depth
301    if current_depth >= config.max_depth {
302        *pruned_by_bounds += 1;
303        current_path.pop();
304        visit_count.entry(node).and_modify(|e| *e -= 1);
305        return Ok(());
306    }
307
308    // Check if this is an exit node
309    if let Some(ref exits) = config.exit_nodes {
310        if exits.contains(&node) && current_depth > 1 {
311            // Save this path (exclude entry, include exit)
312            if all_paths.len() < config.max_paths {
313                all_paths.push(current_path.clone());
314            }
315            *total_found += 1;
316            current_path.pop();
317            visit_count.entry(node).and_modify(|e| *e -= 1);
318            return Ok(());
319        }
320    }
321
322    // Explore neighbors
323    let neighbors = backend.fetch_outgoing(node)?;
324    let mut had_successors = false;
325
326    for neighbor in neighbors {
327        had_successors = true;
328        dfs_enumerate(
329            backend,
330            neighbor,
331            config,
332            current_path,
333            visit_count,
334            all_paths,
335            total_found,
336            pruned_by_bounds,
337            max_depth_reached,
338        )?;
339    }
340
341    // If no successors and no exit nodes specified, save path
342    if !had_successors && config.exit_nodes.is_none() && current_depth > 1 {
343        if all_paths.len() < config.max_paths {
344            all_paths.push(current_path.clone());
345        }
346        *total_found += 1;
347    }
348
349    // Backtrack
350    current_path.pop();
351    visit_count.entry(node).and_modify(|e| *e -= 1);
352
353    Ok(())
354}
355
356/// Result of strongly connected components computation
357#[derive(Debug, Clone)]
358struct SccResult {
359    /// List of SCCs, each is a vector of node IDs
360    components: Vec<Vec<i64>>,
361    /// Mapping from node ID to component index
362    node_to_component: AHashMap<i64, usize>,
363}
364
365/// Backend-agnostic strongly_connected_components implementation
366///
367/// Uses `all_entity_ids` and `fetch_outgoing` from GraphBackend trait.
368/// Implements Tarjan's SCC algorithm.
369fn strongly_connected_components(
370    backend: &dyn GraphBackend,
371) -> Result<SccResult, SqliteGraphError> {
372    let all_ids = backend.all_entity_ids()?;
373
374    if all_ids.is_empty() {
375        return Ok(SccResult {
376            components: Vec::new(),
377            node_to_component: AHashMap::new(),
378        });
379    }
380
381    let mut index = 0usize;
382    let mut stack: Vec<i64> = Vec::new();
383    let mut on_stack: HashSet<i64> = HashSet::new();
384    let mut indices: AHashMap<i64, usize> = AHashMap::new();
385    let mut lowlinks: AHashMap<i64, usize> = AHashMap::new();
386    let mut components: Vec<Vec<i64>> = Vec::new();
387    let mut node_to_component: AHashMap<i64, usize> = AHashMap::new();
388
389    fn strongconnect(
390        v: i64,
391        backend: &dyn GraphBackend,
392        index: &mut usize,
393        stack: &mut Vec<i64>,
394        on_stack: &mut HashSet<i64>,
395        indices: &mut AHashMap<i64, usize>,
396        lowlinks: &mut AHashMap<i64, usize>,
397        components: &mut Vec<Vec<i64>>,
398        node_to_component: &mut AHashMap<i64, usize>,
399    ) -> Result<(), SqliteGraphError> {
400        indices.insert(v, *index);
401        lowlinks.insert(v, *index);
402        *index += 1;
403        stack.push(v);
404        on_stack.insert(v);
405
406        for w in backend.fetch_outgoing(v)? {
407            if !indices.contains_key(&w) {
408                strongconnect(
409                    w,
410                    backend,
411                    index,
412                    stack,
413                    on_stack,
414                    indices,
415                    lowlinks,
416                    components,
417                    node_to_component,
418                )?;
419                let v_low = lowlinks[&v];
420                let w_low = lowlinks[&w];
421                lowlinks.insert(v, v_low.min(w_low));
422            } else if on_stack.contains(&w) {
423                let v_low = lowlinks[&v];
424                let w_idx = indices[&w];
425                lowlinks.insert(v, v_low.min(w_idx));
426            }
427        }
428
429        if lowlinks[&v] == indices[&v] {
430            let mut component = Vec::new();
431            loop {
432                let w = stack.pop().unwrap(); // M-UNWRAP: Tarjan SCC invariant guarantees stack non-empty
433                on_stack.remove(&w);
434                node_to_component.insert(w, components.len());
435                component.push(w);
436                if w == v {
437                    break;
438                }
439            }
440            components.push(component);
441        }
442
443        Ok(())
444    }
445
446    for &node in &all_ids {
447        if !indices.contains_key(&node) {
448            strongconnect(
449                node,
450                backend,
451                &mut index,
452                &mut stack,
453                &mut on_stack,
454                &mut indices,
455                &mut lowlinks,
456                &mut components,
457                &mut node_to_component,
458            )?;
459        }
460    }
461
462    Ok(SccResult {
463        components,
464        node_to_component,
465    })
466}
467
468/// Symbol information for algorithm results
469///
470/// Contains the key metadata needed to identify and locate a symbol.
471#[derive(Debug, Clone, PartialEq, Eq)]
472pub struct SymbolInfo {
473    /// Stable symbol ID (32-char BLAKE3 hash)
474    pub symbol_id: Option<String>,
475    /// Fully-qualified name
476    pub fqn: Option<String>,
477    /// File path containing the symbol
478    pub file_path: String,
479    /// Symbol kind (Function, Method, Class, etc.)
480    pub kind: String,
481}
482
483/// Dead symbol information
484///
485/// Extends [`SymbolInfo`] with a reason why the symbol is considered dead.
486#[derive(Debug, Clone, PartialEq, Eq)]
487pub struct DeadSymbol {
488    /// Base symbol information
489    pub symbol: SymbolInfo,
490    /// Reason why this symbol is unreachable/dead
491    pub reason: String,
492}
493
494/// Cycle kind classification
495#[derive(Debug, Clone, PartialEq, Eq)]
496pub enum CycleKind {
497    /// Multiple symbols calling each other (SCC with >1 member)
498    MutualRecursion,
499    /// Single symbol that calls itself (direct self-loop)
500    SelfLoop,
501}
502
503/// Cycle information for detected cycles
504///
505/// Represents a strongly connected component (SCC) with more than one member,
506/// indicating mutual recursion or a cycle in the call graph.
507#[derive(Debug, Clone, PartialEq, Eq)]
508pub struct Cycle {
509    /// All symbols that participate in this cycle
510    pub members: Vec<SymbolInfo>,
511    /// Classification of the cycle type
512    pub kind: CycleKind,
513}
514
515/// Cycle detection report
516///
517/// Result of running [`CodeGraph::detect_cycles()`], containing all cycles
518/// found in the call graph.
519#[derive(Debug, Clone, PartialEq, Eq)]
520pub struct CycleReport {
521    /// All detected cycles
522    pub cycles: Vec<Cycle>,
523    /// Total number of cycles found
524    pub total_count: usize,
525}
526
527/// Supernode in a condensation graph
528///
529/// Represents an SCC collapsed into a single node for DAG analysis.
530#[derive(Debug, Clone, PartialEq, Eq)]
531pub struct Supernode {
532    /// Supernode ID (stable identifier for this SCC)
533    pub id: i64,
534    /// All symbols that are members of this SCC/supernode
535    pub members: Vec<SymbolInfo>,
536}
537
538/// Condensation graph (DAG after SCC collapse)
539///
540/// Represents the call graph after collapsing all SCCs into supernodes.
541/// The condensation graph is always a DAG (no cycles).
542#[derive(Debug, Clone, PartialEq, Eq)]
543pub struct CondensationGraph {
544    /// All supernodes in the condensed graph
545    pub supernodes: Vec<Supernode>,
546    /// Edges between supernodes (from_supernode_id, to_supernode_id)
547    pub edges: Vec<(i64, i64)>,
548}
549
550/// Condensation result with symbol-to-supernode mapping
551///
552/// Result of running [`CodeGraph::condense_call_graph()`], providing
553/// both the condensed DAG and the mapping from original symbols to supernodes.
554#[derive(Debug, Clone, PartialEq, Eq)]
555pub struct CondensationResult {
556    /// The condensed DAG
557    pub graph: CondensationGraph,
558    /// Maps symbol_id to the supernode ID containing that symbol
559    pub original_to_supernode: HashMap<String, i64>,
560}
561
562/// Direction of program slicing
563#[derive(Debug, Clone, Copy, PartialEq, Eq)]
564pub enum SliceDirection {
565    /// Backward slice: what affects this symbol (reverse reachability)
566    Backward,
567    /// Forward slice: what this symbol affects (forward reachability)
568    Forward,
569}
570
571/// Program slice result
572///
573/// Contains the slice results and statistics for a program slicing operation.
574#[derive(Debug, Clone, PartialEq, Eq)]
575pub struct ProgramSlice {
576    /// Target symbol for the slice
577    pub target: SymbolInfo,
578    /// Direction of the slice
579    pub direction: SliceDirection,
580    /// Symbols included in the slice
581    pub included_symbols: Vec<SymbolInfo>,
582    /// Number of symbols in the slice
583    pub symbol_count: usize,
584}
585
586/// Program slice result with statistics
587///
588/// Wraps a [`ProgramSlice`] with additional statistics about the slice.
589#[derive(Debug, Clone, PartialEq, Eq)]
590pub struct SliceResult {
591    /// The slice itself
592    pub slice: ProgramSlice,
593    /// Statistics about the slice
594    pub statistics: SliceStatistics,
595}
596
597/// Statistics for a program slice
598#[derive(Debug, Clone, PartialEq, Eq)]
599pub struct SliceStatistics {
600    /// Total number of symbols in the slice
601    pub total_symbols: usize,
602    /// Number of data dependencies
603    /// Note: Set to 0 for call-graph fallback (not computed without full CFG)
604    pub data_dependencies: usize,
605    /// Number of control dependencies
606    /// For call-graph fallback, this equals total_symbols (callers/callees)
607    pub control_dependencies: usize,
608}
609
610/// Execution path in the call graph
611///
612/// Represents a single path through the call graph from a starting symbol
613/// to an ending symbol, with metadata about the path.
614#[derive(Debug, Clone, PartialEq, Eq)]
615pub struct ExecutionPath {
616    /// Symbols along the path in order from start to end
617    pub symbols: Vec<SymbolInfo>,
618    /// Number of symbols in the path
619    pub length: usize,
620}
621
622/// Path enumeration result
623///
624/// Contains all discovered execution paths and statistics about the enumeration.
625#[derive(Debug, Clone)]
626pub struct PathEnumerationResult {
627    /// All discovered paths
628    pub paths: Vec<ExecutionPath>,
629    /// Total number of paths enumerated
630    pub total_enumerated: usize,
631    /// Whether enumeration was cut off due to bounds
632    pub bounded_hit: bool,
633    /// Statistics about the discovered paths
634    pub statistics: PathStatistics,
635}
636
637/// Statistics for path enumeration
638#[derive(Debug, Clone)]
639pub struct PathStatistics {
640    /// Average path length
641    pub avg_length: f64,
642    /// Minimum path length
643    pub min_length: usize,
644    /// Maximum path length
645    pub max_length: usize,
646    /// Number of unique symbols across all paths
647    pub unique_symbols: usize,
648}
649
650impl CodeGraph {
651    /// Resolve a stable symbol ID or FQN to its entity ID
652    ///
653    /// First tries to lookup by symbol_id (32-char BLAKE3 hash).
654    /// If not found, falls back to FQN lookup for convenience.
655    ///
656    /// # Arguments
657    /// * `symbol_id_or_fqn` - Stable symbol ID (32-char BLAKE3 hash) or FQN
658    ///
659    /// # Returns
660    /// The entity ID (i64 database row ID) for the symbol
661    ///
662    /// # Errors
663    /// Returns an error if the symbol is not found in the database
664    pub fn resolve_symbol_entity(&self, symbol_id_or_fqn: &str) -> Result<i64> {
665        let conn = self.chunks.connect()?;
666
667        // First try: lookup by symbol_id
668        let mut stmt = conn
669            .prepare_cached(
670                "SELECT id FROM graph_entities
671                 WHERE kind = 'Symbol'
672                 AND json_extract(data, '$.symbol_id') = ?1",
673            )
674            .map_err(|e| anyhow::anyhow!("Failed to prepare symbol ID query: {}", e))?;
675
676        let result = stmt.query_row(params![symbol_id_or_fqn], |row| row.get::<_, i64>(0));
677
678        match result {
679            Ok(entity_id) => return Ok(entity_id),
680            Err(rusqlite::Error::QueryReturnedNoRows) => {
681                // Fallback: try FQN lookup
682            }
683            Err(e) => {
684                return Err(anyhow::anyhow!("Failed to query symbol ID: {}", e));
685            }
686        }
687
688        // Fallback: lookup by FQN or display_fqn
689        let mut stmt = conn
690            .prepare_cached(
691                "SELECT id FROM graph_entities
692                 WHERE kind = 'Symbol'
693                 AND (json_extract(data, '$.fqn') = ?1
694                      OR json_extract(data, '$.display_fqn') = ?1
695                      OR json_extract(data, '$.canonical_fqn') = ?1)",
696            )
697            .map_err(|e| anyhow::anyhow!("Failed to prepare FQN query: {}", e))?;
698
699        stmt.query_row(params![symbol_id_or_fqn], |row| row.get::<_, i64>(0))
700            .map_err(|e| match e {
701                rusqlite::Error::QueryReturnedNoRows => {
702                    anyhow::anyhow!(
703                        "Symbol '{}' not found in database (tried symbol_id, fqn, display_fqn, canonical_fqn)",
704                        symbol_id_or_fqn
705                    )
706                }
707                _ => anyhow::anyhow!("Failed to query symbol by FQN: {}", e),
708            })
709    }
710
711    /// Get symbol information by entity ID
712    ///
713    /// # Arguments
714    /// * `entity_id` - Entity ID (i64 database row ID)
715    ///
716    /// # Returns
717    /// SymbolInfo with metadata from the symbol node
718    pub fn symbol_by_entity_id(&self, entity_id: i64) -> Result<SymbolInfo> {
719        let snapshot = SnapshotId::current();
720        let node = self
721            .calls
722            .backend
723            .get_node(snapshot, entity_id)
724            .map_err(|e| anyhow::anyhow!("Failed to get entity {}: {}", entity_id, e))?;
725
726        if node.kind != "Symbol" {
727            return Err(anyhow::anyhow!(
728                "Entity {} is not a Symbol (kind: {})",
729                entity_id,
730                node.kind
731            ));
732        }
733
734        let symbol_node: SymbolNode = serde_json::from_value(node.data)
735            .map_err(|e| anyhow::anyhow!("Failed to parse SymbolNode data: {}", e))?;
736
737        Ok(SymbolInfo {
738            symbol_id: symbol_node.symbol_id,
739            fqn: symbol_node.fqn.or_else(|| symbol_node.display_fqn.clone()),
740            file_path: node.file_path.unwrap_or_else(|| "?".to_string()),
741            kind: symbol_node.kind,
742        })
743    }
744
745    /// Get all Symbol entity IDs in the call graph
746    ///
747    /// Returns entity IDs for all Symbol nodes that are part of the call graph
748    /// (have incoming or outgoing CALLS edges).
749    ///
750    /// # Returns
751    /// Vector of entity IDs for call graph symbols
752    fn all_call_graph_entities(&self) -> Result<Vec<i64>> {
753        let conn = self.chunks.connect()?;
754
755        // Find all symbols that participate in CALLS edges
756        // (either as caller via CALLER edges or as callee via CALLS edges)
757        let mut stmt = conn
758            .prepare_cached(
759                "SELECT DISTINCT from_id FROM graph_edges WHERE edge_type = 'CALLER'
760                 UNION
761                 SELECT DISTINCT to_id FROM graph_edges WHERE edge_type = 'CALLS'",
762            )
763            .map_err(|e| anyhow::anyhow!("Failed to prepare call graph query: {}", e))?;
764
765        let entity_ids = stmt
766            .query_map([], |row| row.get::<_, i64>(0))
767            .map_err(|e| anyhow::anyhow!("Failed to execute call graph query: {}", e))?
768            .collect::<Result<Vec<_>, _>>()
769            .map_err(|e| anyhow::anyhow!("Failed to collect call graph results: {}", e))?;
770
771        Ok(entity_ids)
772    }
773
774    /// Find all symbols reachable from a given symbol (forward reachability)
775    ///
776    /// Computes the transitive closure of the call graph starting from the
777    /// specified symbol. Returns all symbols that can be reached by following
778    /// CALLS edges from the starting symbol.
779    ///
780    /// # Graph View
781    ///
782    /// This operates on the **call graph** (CALLS edges only), not other edge types.
783    /// The starting symbol itself is NOT included in the results.
784    ///
785    /// # Arguments
786    /// * `symbol_id` - Stable symbol ID to start from (or FQN as fallback)
787    /// * `max_depth` - Optional maximum depth limit (None = unlimited)
788    ///
789    /// # Returns
790    /// Vector of [`SymbolInfo`] for reachable symbols, sorted deterministically
791    ///
792    /// # Example
793    ///
794    /// \`\`\`no_run
795    /// # use magellan::CodeGraph;
796    /// # let mut graph = CodeGraph::open("test.db").unwrap();
797    /// // Find all functions called from main (directly or indirectly)
798    /// let reachable = graph.reachable_symbols("main", None)?;
799    /// \`\`\`
800    pub fn reachable_symbols(
801        &self,
802        symbol_id: &str,
803        _max_depth: Option<usize>,
804    ) -> Result<Vec<SymbolInfo>> {
805        let entity_id = self.resolve_symbol_entity(symbol_id)?;
806        let backend = &*self.calls.backend;
807
808        // Use backend-agnostic reachable_from implementation
809        // This traverses outgoing edges from the start node
810        let reachable_entity_ids = reachable_from(backend, entity_id)?;
811
812        // Convert entity IDs to SymbolInfo
813        let mut symbols = Vec::new();
814        for id in reachable_entity_ids {
815            // Skip the starting symbol itself
816            if id == entity_id {
817                continue;
818            }
819
820            if let Ok(info) = self.symbol_by_entity_id(id) {
821                symbols.push(info);
822            }
823        }
824
825        // Sort deterministically for stable output
826        symbols.sort_by(|a, b| {
827            a.file_path
828                .cmp(&b.file_path)
829                .then_with(|| a.fqn.as_ref().cmp(&b.fqn.as_ref()))
830                .then_with(|| a.kind.cmp(&b.kind))
831        });
832
833        Ok(symbols)
834    }
835
836    /// Find all symbols that can reach a given symbol (reverse reachability)
837    ///
838    /// Computes the reverse transitive closure of the call graph. Returns all
839    /// symbols from which the specified symbol can be reached by following
840    /// CALLS edges (i.e., all callers that directly or indirectly call this symbol).
841    ///
842    /// # Graph View
843    ///
844    /// This operates on the **call graph** (CALLS edges only).
845    ///
846    /// # Arguments
847    /// * `symbol_id` - Stable symbol ID to analyze
848    /// * `max_depth` - Optional maximum depth limit (None = unlimited)
849    ///
850    /// # Returns
851    /// Vector of [`SymbolInfo`] for symbols that can reach the target, sorted deterministically
852    ///
853    /// # Example
854    ///
855    /// \`\`\`no_run
856    /// # use magellan::CodeGraph;
857    /// # let mut graph = CodeGraph::open("test.db").unwrap();
858    /// // Find all functions that directly or indirectly call 'helper_function'
859    /// let callers = graph.reverse_reachable_symbols("helper_symbol_id", None)?;
860    /// \`\`\`
861    pub fn reverse_reachable_symbols(
862        &self,
863        symbol_id: &str,
864        _max_depth: Option<usize>,
865    ) -> Result<Vec<SymbolInfo>> {
866        let entity_id = self.resolve_symbol_entity(symbol_id)?;
867        let backend = &*self.calls.backend;
868
869        // Use backend-agnostic reverse_reachable_from implementation
870        // This traverses incoming edges to the target node
871        let reachable_entity_ids = reverse_reachable_from(backend, entity_id)?;
872
873        // Convert entity IDs to SymbolInfo
874        let mut symbols = Vec::new();
875        for id in reachable_entity_ids {
876            // Skip the starting symbol itself
877            if id == entity_id {
878                continue;
879            }
880
881            if let Ok(info) = self.symbol_by_entity_id(id) {
882                symbols.push(info);
883            }
884        }
885
886        // Sort deterministically for stable output
887        symbols.sort_by(|a, b| {
888            a.file_path
889                .cmp(&b.file_path)
890                .then_with(|| a.fqn.as_ref().cmp(&b.fqn.as_ref()))
891                .then_with(|| a.kind.cmp(&b.kind))
892        });
893
894        Ok(symbols)
895    }
896
897    /// Find dead code unreachable from an entry point symbol
898    ///
899    /// Identifies all symbols in the call graph that cannot be reached from
900    /// the specified entry point (e.g., `main`, `test_main`). This is useful
901    /// for detecting unused functions and dead code.
902    ///
903    /// # Graph View
904    ///
905    /// This operates on the **call graph** (CALLS edges only). Symbols not
906    /// participating in call edges are not considered.
907    ///
908    /// # Limitations
909    ///
910    /// - Only considers the call graph. Symbols called via reflection,
911    ///   function pointers, or dynamic dispatch may be incorrectly flagged.
912    /// - Test functions, benchmark code, and platform-specific code may
913    ///   appear as dead code if not reachable from the specified entry point.
914    ///
915    /// # Arguments
916    /// * `entry_symbol_id` - Stable symbol ID of the entry point (e.g., main function)
917    ///
918    /// # Returns
919    /// Vector of [`DeadSymbol`] for unreachable symbols, sorted deterministically
920    ///
921    /// # Example
922    ///
923    /// \`\`\`no_run
924    /// # use magellan::CodeGraph;
925    /// # let mut graph = CodeGraph::open("test.db").unwrap();
926    /// // Find all functions unreachable from main
927    /// let dead = graph.dead_symbols("main_symbol_id")?;
928    /// for dead_symbol in dead {
929    ///     println!("Dead: {} in {} ({})",
930    ///         dead_symbol.symbol.fqn.as_deref().unwrap_or("?"),
931    ///         dead_symbol.symbol.file_path,
932    ///         dead_symbol.reason);
933    /// }
934    /// \`\`\`
935    pub fn dead_symbols(&self, entry_symbol_id: &str) -> Result<Vec<DeadSymbol>> {
936        let entry_entity = self.resolve_symbol_entity(entry_symbol_id)?;
937
938        // Get all call graph entities
939        let all_entities = self.all_call_graph_entities()?;
940
941        // Find all entities reachable from the entry point
942        let backend = &*self.calls.backend;
943        let reachable_ids = reachable_from(backend, entry_entity)?;
944
945        // Dead symbols = all entities - reachable entities
946        let reachable_set: HashSet<i64> = reachable_ids.into_iter().collect();
947        let mut dead_symbols = Vec::new();
948
949        for entity_id in all_entities {
950            // Skip the entry point itself
951            if entity_id == entry_entity {
952                continue;
953            }
954
955            // If not reachable from entry point, it's dead code
956            if !reachable_set.contains(&entity_id) {
957                if let Ok(info) = self.symbol_by_entity_id(entity_id) {
958                    dead_symbols.push(DeadSymbol {
959                        reason: "unreachable from entry point".to_string(),
960                        symbol: info,
961                    });
962                }
963            }
964        }
965
966        // Sort deterministically for stable output
967        dead_symbols.sort_by(|a, b| {
968            a.symbol
969                .file_path
970                .cmp(&b.symbol.file_path)
971                .then_with(|| a.symbol.fqn.as_ref().cmp(&b.symbol.fqn.as_ref()))
972                .then_with(|| a.symbol.kind.cmp(&b.symbol.kind))
973        });
974
975        Ok(dead_symbols)
976    }
977
978    /// Detect cycles in the call graph using SCC decomposition
979    ///
980    /// Finds all strongly connected components (SCCs) with more than one member,
981    /// which indicate cycles or mutual recursion in the call graph.
982    ///
983    /// # Graph View
984    ///
985    /// This operates on the **call graph** (CALLS edges only).
986    ///
987    /// # Cycle Detection
988    ///
989    /// Uses Tarjan's SCC algorithm to find strongly connected components.
990    /// Only SCCs with more than one member are reported as cycles (MutualRecursion).
991    /// Single-node SCCs are not cycles (unless they have self-loops).
992    ///
993    /// # Returns
994    /// [`CycleReport`] containing all detected cycles
995    ///
996    /// # Example
997    ///
998    /// \`\`\`no_run
999    /// # use magellan::CodeGraph;
1000    /// # let mut graph = CodeGraph::open("test.db").unwrap();
1001    /// let report = graph.detect_cycles()?;
1002    /// println!("Found {} cycles", report.total_count);
1003    /// for cycle in &report.cycles {
1004    ///     println!("Cycle with {} members:", cycle.members.len());
1005    ///     for member in &cycle.members {
1006    ///         println!("  - {}", member.fqn.as_deref().unwrap_or("?"));
1007    ///     }
1008    /// }
1009    /// \`\`\`
1010    pub fn detect_cycles(&self) -> Result<CycleReport> {
1011        let backend = &*self.calls.backend;
1012
1013        // Use backend-agnostic strongly_connected_components implementation
1014        let scc_result = strongly_connected_components(backend)?;
1015
1016        // Filter to SCCs with >1 member (mutual recursion)
1017        let cycles: Vec<_> = scc_result
1018            .components
1019            .into_iter()
1020            .filter(|scc| scc.len() > 1)
1021            .map(|members| {
1022                // Convert entity IDs to SymbolInfo
1023                let symbol_infos: Vec<_> = members
1024                    .into_iter()
1025                    .filter_map(|id| self.symbol_by_entity_id(id).ok())
1026                    .collect();
1027
1028                Cycle {
1029                    members: symbol_infos,
1030                    kind: CycleKind::MutualRecursion,
1031                }
1032            })
1033            .filter(|cycle| !cycle.members.is_empty())
1034            .collect();
1035
1036        let total_count = cycles.len();
1037
1038        // Sort cycles deterministically
1039        let mut cycles = cycles;
1040        cycles.sort_by(|a, b| match (a.members.first(), b.members.first()) {
1041            (Some(am), Some(bm)) => match (am.fqn.as_ref(), bm.fqn.as_ref()) {
1042                (Some(af), Some(bf)) => af.cmp(bf),
1043                (Some(_), None) => std::cmp::Ordering::Less,
1044                (None, Some(_)) => std::cmp::Ordering::Greater,
1045                (None, None) => std::cmp::Ordering::Equal,
1046            },
1047            (Some(_), None) => std::cmp::Ordering::Less,
1048            (None, Some(_)) => std::cmp::Ordering::Greater,
1049            (None, None) => std::cmp::Ordering::Equal,
1050        });
1051
1052        Ok(CycleReport {
1053            cycles,
1054            total_count,
1055        })
1056    }
1057
1058    /// Find cycles containing a specific symbol
1059    ///
1060    /// Returns only the cycles that include the specified symbol in their member set.
1061    ///
1062    /// # Arguments
1063    /// * `symbol_id` - Stable symbol ID or FQN to search for
1064    ///
1065    /// # Returns
1066    /// Vector of [`Cycle`] containing the specified symbol
1067    ///
1068    /// # Example
1069    ///
1070    /// \`\`\`no_run
1071    /// # use magellan::CodeGraph;
1072    /// # let mut graph = CodeGraph::open("test.db").unwrap();
1073    /// let cycles = graph.find_cycles_containing("problematic_function")?;
1074    /// if cycles.is_empty() {
1075    ///     println!("No cycles found containing this symbol");
1076    /// } else {
1077    ///     println!("Found {} cycles containing this symbol", cycles.len());
1078    /// }
1079    /// \`\`\`
1080    pub fn find_cycles_containing(&self, symbol_id: &str) -> Result<Vec<Cycle>> {
1081        let entity_id = self.resolve_symbol_entity(symbol_id)?;
1082        let backend = &*self.calls.backend;
1083
1084        // Use backend-agnostic strongly_connected_components implementation
1085        let scc_result = strongly_connected_components(backend)?;
1086
1087        // Find which SCC contains this entity
1088        let target_component_idx = scc_result.node_to_component.get(&entity_id);
1089
1090        let target_idx = match target_component_idx {
1091            Some(&idx) => idx,
1092            None => return Ok(Vec::new()), // Symbol not in any SCC (shouldn't happen)
1093        };
1094
1095        // Check if this SCC is a cycle (has >1 member)
1096        let target_component = &scc_result.components[target_idx];
1097        if target_component.len() <= 1 {
1098            // Single node SCC - not a cycle (unless self-loop, but that's rare)
1099            return Ok(Vec::new());
1100        }
1101
1102        // Convert this SCC to a Cycle
1103        let symbol_infos: Vec<_> = target_component
1104            .iter()
1105            .filter_map(|&id| self.symbol_by_entity_id(id).ok())
1106            .collect();
1107
1108        let cycle = Cycle {
1109            members: symbol_infos,
1110            kind: CycleKind::MutualRecursion,
1111        };
1112
1113        Ok(vec![cycle])
1114    }
1115
1116    /// Condense the call graph by collapsing SCCs into supernodes
1117    ///
1118    /// Creates a condensation DAG by collapsing each strongly connected component
1119    /// into a single "supernode". The resulting graph is always acyclic, making it
1120    /// suitable for topological analysis and safe refactoring.
1121    ///
1122    /// # Graph View
1123    ///
1124    /// This operates on the **call graph** (CALLS edges only).
1125    ///
1126    /// # Use Cases
1127    ///
1128    /// - **Topological Sorting**: Condensation graph is a DAG, enabling topo sort
1129    /// - **Mutual Recursion Detection**: Large supernodes indicate tight coupling
1130    /// - **Impact Analysis**: Changing one symbol affects its entire SCC
1131    ///
1132    /// # Returns
1133    /// [`CondensationResult`] with the condensed DAG and symbol-to-supernode mapping
1134    ///
1135    /// # Example
1136    ///
1137    /// \`\`\`no_run
1138    /// # use magellan::CodeGraph;
1139    /// # let mut graph = CodeGraph::open("test.db").unwrap();
1140    /// let condensed = graph.condense_call_graph()?;
1141    ///
1142    /// println!("Condensed to {} supernodes", condensed.graph.supernodes.len());
1143    /// println!("Condensed graph has {} edges", condensed.graph.edges.len());
1144    ///
1145    /// // Check which SCC a symbol belongs to
1146    /// if let Some(supernode_id) = condensed.original_to_supernode.get("some_symbol_id") {
1147    ///     println!("Symbol is in SCC {}", supernode_id);
1148    /// }
1149    /// \`\`\`
1150    pub fn condense_call_graph(&self) -> Result<CondensationResult> {
1151        let backend = &*self.calls.backend;
1152
1153        // Use backend-agnostic collapse_sccs implementation
1154        let collapse_result = collapse_sccs(backend)?;
1155
1156        // Build supernodes with SymbolInfo members
1157        let mut supernodes = Vec::new();
1158        let mut original_to_supernode = HashMap::new();
1159
1160        for (&supernode_id, member_ids) in &collapse_result.supernode_members {
1161            let symbol_infos: Vec<_> = member_ids
1162                .iter()
1163                .filter_map(|&id| self.symbol_by_entity_id(id).ok())
1164                .collect();
1165
1166            // Build mapping from symbol_id to supernode
1167            for symbol_info in &symbol_infos {
1168                if let Some(ref sym_id) = symbol_info.symbol_id {
1169                    original_to_supernode.insert(sym_id.clone(), supernode_id);
1170                }
1171            }
1172
1173            supernodes.push(Supernode {
1174                id: supernode_id,
1175                members: symbol_infos,
1176            });
1177        }
1178
1179        // Sort supernodes deterministically
1180        supernodes.sort_by_key(|a| a.id);
1181
1182        let graph = CondensationGraph {
1183            supernodes,
1184            edges: collapse_result.supernode_edges,
1185        };
1186
1187        Ok(CondensationResult {
1188            graph,
1189            original_to_supernode,
1190        })
1191    }
1192
1193    /// Compute a backward program slice (what affects this symbol)
1194    ///
1195    /// Returns all symbols that can affect the target symbol through the call graph.
1196    /// This is useful for bug isolation - finding all code that could influence
1197    /// a given symbol's behavior.
1198    ///
1199    /// # Graph View (Call-Graph Fallback)
1200    ///
1201    /// **Current implementation uses call-graph reachability as a fallback.**
1202    /// Full CFG-based program slicing requires control dependence graph (CDG)
1203    /// which needs post-dominators and AST CFG integration not yet available.
1204    ///
1205    /// The fallback implementation uses reverse reachability on the call graph,
1206    /// finding all callers that directly or indirectly call this symbol.
1207    ///
1208    /// # Limitations
1209    ///
1210    /// - Uses call-graph reachability instead of full CFG-based slicing
1211    /// - Does not include data flow dependencies within functions
1212    /// - Does not include control flow from conditionals/loops
1213    /// - Full slicing will be available when AST CFG edges are integrated
1214    ///
1215    /// # Arguments
1216    /// * `symbol_id` - Stable symbol ID or FQN to slice from
1217    ///
1218    /// # Returns
1219    /// [`SliceResult`] containing the slice and statistics
1220    ///
1221    /// # Example
1222    ///
1223    /// \`\`\`no_run
1224    /// # use magellan::CodeGraph;
1225    /// # let mut graph = CodeGraph::open("test.db").unwrap();
1226    /// // Find what affects 'helper_function'
1227    /// let slice_result = graph.backward_slice("helper_function")?;
1228    /// println!("{} symbols affect this function", slice_result.slice.symbol_count);
1229    /// for symbol in &slice_result.slice.included_symbols {
1230    ///     println!("  - {}", symbol.fqn.as_deref().unwrap_or("?"));
1231    /// }
1232    /// \`\`\`
1233    pub fn backward_slice(&self, symbol_id: &str) -> Result<SliceResult> {
1234        let entity_id = self.resolve_symbol_entity(symbol_id)?;
1235        let backend = &*self.calls.backend;
1236
1237        // Get target symbol info
1238        let target = self.symbol_by_entity_id(entity_id)?;
1239
1240        // Use backend-agnostic reverse_reachable_from on call graph
1241        // This finds all callers that directly or indirectly call this symbol
1242        let caller_entity_ids = reverse_reachable_from(backend, entity_id)?;
1243
1244        // Convert entity IDs to SymbolInfo
1245        let mut included_symbols = Vec::new();
1246        for id in caller_entity_ids {
1247            // Skip the starting symbol itself
1248            if id == entity_id {
1249                continue;
1250            }
1251
1252            if let Ok(info) = self.symbol_by_entity_id(id) {
1253                included_symbols.push(info);
1254            }
1255        }
1256
1257        // Sort deterministically
1258        included_symbols.sort_by(|a, b| {
1259            a.file_path
1260                .cmp(&b.file_path)
1261                .then_with(|| a.fqn.as_ref().cmp(&b.fqn.as_ref()))
1262                .then_with(|| a.kind.cmp(&b.kind))
1263        });
1264
1265        let symbol_count = included_symbols.len();
1266
1267        Ok(SliceResult {
1268            slice: ProgramSlice {
1269                target,
1270                direction: SliceDirection::Backward,
1271                included_symbols,
1272                symbol_count,
1273            },
1274            statistics: SliceStatistics {
1275                total_symbols: symbol_count,
1276                data_dependencies: 0, // Not available in call-graph fallback
1277                control_dependencies: symbol_count,
1278            },
1279        })
1280    }
1281
1282    /// Compute a forward program slice (what this symbol affects)
1283    ///
1284    /// Returns all symbols that the target symbol can affect through the call graph.
1285    /// This is useful for refactoring safety - finding all code that could be
1286    /// impacted by changes to this symbol.
1287    ///
1288    /// # Graph View (Call-Graph Fallback)
1289    ///
1290    /// **Current implementation uses call-graph reachability as a fallback.**
1291    /// Full CFG-based program slicing requires control dependence graph (CDG)
1292    /// which needs post-dominators and AST CFG integration not yet available.
1293    ///
1294    /// The fallback implementation uses forward reachability on the call graph,
1295    /// finding all callees that this symbol directly or indirectly calls.
1296    ///
1297    /// # Limitations
1298    ///
1299    /// - Uses call-graph reachability instead of full CFG-based slicing
1300    /// - Does not include data flow dependencies within functions
1301    /// - Does not include control flow from conditionals/loops
1302    /// - Full slicing will be available when AST CFG edges are integrated
1303    ///
1304    /// # Arguments
1305    /// * `symbol_id` - Stable symbol ID or FQN to slice from
1306    ///
1307    /// # Returns
1308    /// [`SliceResult`] containing the slice and statistics
1309    ///
1310    /// # Example
1311    ///
1312    /// \`\`\`no_run
1313    /// # use magellan::CodeGraph;
1314    /// # let mut graph = CodeGraph::open("test.db").unwrap();
1315    /// // Find what 'main_function' affects
1316    /// let slice_result = graph.forward_slice("main_function")?;
1317    /// println!("{} symbols are affected by this function", slice_result.slice.symbol_count);
1318    /// for symbol in &slice_result.slice.included_symbols {
1319    ///     println!("  - {}", symbol.fqn.as_deref().unwrap_or("?"));
1320    /// }
1321    /// \`\`\`
1322    pub fn forward_slice(&self, symbol_id: &str) -> Result<SliceResult> {
1323        let entity_id = self.resolve_symbol_entity(symbol_id)?;
1324        let backend = &*self.calls.backend;
1325
1326        // Get target symbol info
1327        let target = self.symbol_by_entity_id(entity_id)?;
1328
1329        // Use backend-agnostic reachable_from on call graph
1330        // This finds all callees that this symbol directly or indirectly calls
1331        let callee_entity_ids = reachable_from(backend, entity_id)?;
1332
1333        // Convert entity IDs to SymbolInfo
1334        let mut included_symbols = Vec::new();
1335        for id in callee_entity_ids {
1336            // Skip the starting symbol itself
1337            if id == entity_id {
1338                continue;
1339            }
1340
1341            if let Ok(info) = self.symbol_by_entity_id(id) {
1342                included_symbols.push(info);
1343            }
1344        }
1345
1346        // Sort deterministically
1347        included_symbols.sort_by(|a, b| {
1348            a.file_path
1349                .cmp(&b.file_path)
1350                .then_with(|| a.fqn.as_ref().cmp(&b.fqn.as_ref()))
1351                .then_with(|| a.kind.cmp(&b.kind))
1352        });
1353
1354        let symbol_count = included_symbols.len();
1355
1356        Ok(SliceResult {
1357            slice: ProgramSlice {
1358                target,
1359                direction: SliceDirection::Forward,
1360                included_symbols,
1361                symbol_count,
1362            },
1363            statistics: SliceStatistics {
1364                total_symbols: symbol_count,
1365                data_dependencies: 0, // Not available in call-graph fallback
1366                control_dependencies: symbol_count,
1367            },
1368        })
1369    }
1370
1371    /// Enumerate execution paths from a starting symbol
1372    ///
1373    /// Finds all execution paths from `start_symbol_id` to `end_symbol_id` (if provided)
1374    /// or all paths starting from `start_symbol_id` (if end_symbol_id is None).
1375    ///
1376    /// Path enumeration uses bounded DFS to prevent infinite traversal in cyclic graphs:
1377    /// - `max_depth`: Maximum path length (number of edges)
1378    /// - `max_paths`: Maximum number of paths to return
1379    /// - `revisit_cap`: Maximum number of times a single node can be revisited (prevents infinite loops)
1380    ///
1381    /// # Arguments
1382    ///
1383    /// * `start_symbol_id` - Starting symbol ID or FQN
1384    /// * `end_symbol_id` - Optional ending symbol ID or FQN (if None, enumerates all paths from start)
1385    /// * `max_depth` - Maximum path depth (default: 100)
1386    /// * `max_paths` - Maximum number of paths to return (default: 1000)
1387    ///
1388    /// # Returns
1389    ///
1390    /// Returns a [`PathEnumerationResult`] containing:
1391    /// - All discovered paths
1392    /// - Whether enumeration hit bounds
1393    /// - Statistics about path lengths and unique symbols
1394    ///
1395    /// # Example
1396    ///
1397    /// \`\`\`no_run
1398    /// # use magellan::CodeGraph;
1399    /// # fn main() -> anyhow::Result<()> {
1400    /// let mut graph = CodeGraph::open("codegraph.db")?;
1401    ///
1402    /// // Find all paths from main to any leaf function
1403    /// let result = graph.enumerate_paths("main", None, 50, 100)?;
1404    ///
1405    /// println!("Found {} paths", result.total_enumerated);
1406    /// println!("Average length: {:.2}", result.statistics.avg_length);
1407    /// for (i, path) in result.paths.iter().enumerate() {
1408    ///     println!("Path {}: {:?}", path.symbols.iter().map(|s| s.fqn.as_deref().unwrap_or("?")).collect::<Vec<_>>());
1409    /// }
1410    /// # Ok(())
1411    /// # }
1412    /// \`\`\`
1413    pub fn enumerate_paths(
1414        &self,
1415        start_symbol_id: &str,
1416        end_symbol_id: Option<&str>,
1417        max_depth: usize,
1418        max_paths: usize,
1419    ) -> Result<PathEnumerationResult> {
1420        let start_entity_id = self.resolve_symbol_entity(start_symbol_id)?;
1421        let backend = &*self.calls.backend;
1422
1423        // Build exit_nodes set for target symbol
1424        let exit_nodes: Option<AHashSet<i64>> = if let Some(end_id) = end_symbol_id {
1425            let end_entity_id = self.resolve_symbol_entity(end_id)?;
1426            let mut set = AHashSet::new();
1427            set.insert(end_entity_id);
1428            Some(set)
1429        } else {
1430            None
1431        };
1432
1433        // Use backend-agnostic path enumeration
1434        let config = PathEnumerationConfig {
1435            max_depth,
1436            max_paths,
1437            revisit_cap: 100, // Prevent infinite loops in cyclic graphs
1438            exit_nodes,
1439            _error_nodes: None,
1440        };
1441
1442        let enum_result = enumerate_paths(backend, start_entity_id, &config)?;
1443
1444        // Convert path enumeration result to our format
1445        let mut paths = Vec::new();
1446        let mut all_symbols = HashSet::new();
1447        let mut min_length = usize::MAX;
1448        let mut max_length = 0;
1449        let mut total_length = 0;
1450
1451        for path in enum_result.paths {
1452            let mut symbols = Vec::new();
1453
1454            for &entity_id in &path {
1455                if let Ok(info) = self.symbol_by_entity_id(entity_id) {
1456                    all_symbols.insert(info.symbol_id.clone().unwrap_or_default());
1457                    symbols.push(info);
1458                }
1459            }
1460
1461            let length = symbols.len();
1462            if length > 0 {
1463                min_length = min_length.min(length);
1464                max_length = max_length.max(length);
1465                total_length += length;
1466
1467                paths.push(ExecutionPath { symbols, length });
1468            }
1469        }
1470
1471        // Sort paths: first by starting symbol FQN, then by length
1472        paths.sort_by(|a, b| {
1473            match (
1474                a.symbols.first().and_then(|s| s.fqn.as_ref()),
1475                b.symbols.first().and_then(|s| s.fqn.as_ref()),
1476            ) {
1477                (Some(a_fqn), Some(b_fqn)) => {
1478                    a_fqn.cmp(b_fqn).then_with(|| a.length.cmp(&b.length))
1479                }
1480                (Some(_), None) => std::cmp::Ordering::Less,
1481                (None, Some(_)) => std::cmp::Ordering::Greater,
1482                (None, None) => a.length.cmp(&b.length),
1483            }
1484        });
1485
1486        let avg_length = if paths.is_empty() {
1487            0.0
1488        } else {
1489            total_length as f64 / paths.len() as f64
1490        };
1491
1492        // Determine if we hit bounds
1493        let bounded_hit = enum_result.pruned_by_bounds > 0;
1494
1495        Ok(PathEnumerationResult {
1496            paths,
1497            total_enumerated: enum_result.total_found,
1498            bounded_hit,
1499            statistics: PathStatistics {
1500                avg_length,
1501                min_length: if min_length == usize::MAX {
1502                    0
1503                } else {
1504                    min_length
1505                },
1506                max_length,
1507                unique_symbols: all_symbols.len(),
1508            },
1509        })
1510    }
1511}
1512
1513#[cfg(test)]
1514mod tests {
1515    use super::*;
1516    use crate::CodeGraph;
1517
1518    use std::sync::atomic::{AtomicU64, Ordering};
1519
1520    static TEST_DIR_COUNTER: AtomicU64 = AtomicU64::new(0);
1521
1522    fn next_test_dir() -> std::path::PathBuf {
1523        let n = TEST_DIR_COUNTER.fetch_add(1, Ordering::SeqCst);
1524        std::env::temp_dir().join(format!("magellan_test_{}_{}", std::process::id(), n))
1525    }
1526
1527    /// Test helper to create a simple call graph for testing
1528    ///
1529    /// Creates:
1530    /// - main() -> helper_a() -> leaf()
1531    /// - main() -> helper_b() -> leaf()
1532    /// - unused_function() -> leaf()
1533    ///
1534    /// Returns the CodeGraph and symbol IDs for main and unused_function
1535    fn create_test_graph() -> Result<(CodeGraph, String, String)> {
1536        // Use a persistent temp directory that won't be deleted
1537        // This is necessary for V3 backend which needs files to remain accessible.
1538        // Use a unique suffix per call to avoid collisions when tests run in parallel.
1539        let temp_dir = std::env::temp_dir().join(format!(
1540            "magellan_test_{}_{}",
1541            std::process::id(),
1542            std::time::SystemTime::now()
1543                .duration_since(std::time::UNIX_EPOCH)
1544                .unwrap_or_default()
1545                .as_nanos()
1546        ));
1547        std::fs::create_dir_all(&temp_dir)?;
1548        let db_path = temp_dir.join("test.db");
1549
1550        let source = r#"
1551fn main() {
1552    helper_a();
1553    helper_b();
1554}
1555
1556fn helper_a() {
1557    leaf();
1558}
1559
1560fn helper_b() {
1561    leaf();
1562}
1563
1564fn leaf() {
1565    println!("leaf");
1566}
1567
1568fn unused_function() {
1569    leaf();
1570}
1571"#;
1572
1573        let mut graph = CodeGraph::open(&db_path)?;
1574        // Index the file - use a temporary file path
1575        let test_file = temp_dir.join("test.rs");
1576        std::fs::write(&test_file, source)?;
1577        let path_str = test_file.to_string_lossy().to_string();
1578        let source_bytes = std::fs::read(&test_file)?;
1579
1580        // Index symbols and calls
1581        graph.index_file(&path_str, &source_bytes)?;
1582        graph.index_calls(&path_str, &source_bytes)?;
1583
1584        // Find the symbol IDs for main and unused_function
1585        let symbols = crate::graph::query::symbols_in_file(&mut graph, &path_str)?;
1586        let main_id = symbols
1587            .iter()
1588            .find(|s| s.name.as_deref() == Some("main"))
1589            .and_then(|s| s.fqn.clone())
1590            .unwrap_or_default();
1591
1592        let unused_id = symbols
1593            .iter()
1594            .find(|s| s.name.as_deref() == Some("unused_function"))
1595            .and_then(|s| s.fqn.clone())
1596            .unwrap_or_default();
1597
1598        // For testing, use the symbol's FQN directly
1599        // In a real scenario with proper SymbolId generation, we'd use that
1600        Ok((graph, main_id, unused_id))
1601    }
1602
1603    #[test]
1604    fn test_resolve_symbol_entity_not_found() {
1605        let (graph, _, _) = create_test_graph().unwrap();
1606        let result = graph.resolve_symbol_entity("nonexistent_id_123456789012");
1607        assert!(result.is_err());
1608    }
1609
1610    #[test]
1611    fn test_symbol_by_entity_id() {
1612        let (graph, _, _) = create_test_graph().unwrap();
1613
1614        // Get all entity IDs
1615        let entity_ids = graph.calls.backend.entity_ids().unwrap();
1616        let snapshot = SnapshotId::current();
1617
1618        // Find a Symbol entity
1619        for entity_id in entity_ids {
1620            if let Ok(node) = graph.calls.backend.get_node(snapshot, entity_id) {
1621                if node.kind == "Symbol" {
1622                    let info = graph.symbol_by_entity_id(entity_id);
1623                    assert!(info.is_ok());
1624                    let symbol_info = info.unwrap();
1625                    assert!(!symbol_info.file_path.is_empty());
1626                    assert!(!symbol_info.kind.is_empty());
1627                    return;
1628                }
1629            }
1630        }
1631
1632        panic!("No Symbol entity found in test graph");
1633    }
1634
1635    #[test]
1636    fn test_reachable_symbols_basic() {
1637        let (graph, _main_id, _unused_id) = create_test_graph().unwrap();
1638
1639        // Get all symbols and verify we can query them
1640        let entity_ids = graph.calls.backend.entity_ids().unwrap();
1641        eprintln!("Total entities: {}", entity_ids.len());
1642        let snapshot = SnapshotId::current();
1643        let mut found_symbols = 0;
1644
1645        for entity_id in &entity_ids {
1646            match graph.calls.backend.get_node(snapshot, *entity_id) {
1647                Ok(node) => {
1648                    eprintln!("  Entity {}: kind={}", entity_id, node.kind);
1649                    if node.kind == "Symbol" {
1650                        found_symbols += 1;
1651                    }
1652                }
1653                Err(e) => {
1654                    eprintln!("  Entity {}: ERROR getting node: {:?}", entity_id, e);
1655                }
1656            }
1657        }
1658
1659        // We should have found at least some symbols
1660        assert!(
1661            found_symbols > 0,
1662            "Should find Symbol entities in test graph, got {} symbols from {} entities",
1663            found_symbols,
1664            entity_ids.len()
1665        );
1666    }
1667
1668    #[test]
1669    fn test_reachable_symbols_max_depth() {
1670        let (graph, _main_id, _unused_id) = create_test_graph().unwrap();
1671
1672        // Get the main function's entity ID
1673        let snapshot = SnapshotId::current();
1674        let entity_ids = graph.calls.backend.entity_ids().unwrap();
1675
1676        let main_entity_id = entity_ids.into_iter().find(|&id| {
1677            if let Ok(node) = graph.calls.backend.get_node(snapshot, id) {
1678                if let Ok(data) = serde_json::from_value::<serde_json::Value>(node.data) {
1679                    if let Some(name) = data.get("name").and_then(|v| v.as_str()) {
1680                        return name == "main";
1681                    }
1682                }
1683            }
1684            false
1685        });
1686
1687        if let Some(entity_id) = main_entity_id {
1688            // Verify we can get the node
1689            let result = graph.calls.backend.get_node(snapshot, entity_id);
1690            assert!(result.is_ok(), "Should be able to get main node");
1691        }
1692    }
1693
1694    #[test]
1695    fn test_dead_symbols() {
1696        let (graph, _main_id, _unused_id) = create_test_graph().unwrap();
1697
1698        // Get all entity IDs
1699        let entity_ids = graph.calls.backend.entity_ids().unwrap();
1700
1701        // We should have some entities in the call graph
1702        assert!(entity_ids.len() > 0, "Should have call graph entities");
1703    }
1704
1705    #[test]
1706    fn test_reverse_reachable_symbols() {
1707        let (graph, _main_id, _unused_id) = create_test_graph().unwrap();
1708
1709        // Get all entity IDs
1710        let entity_ids = graph.calls.backend.entity_ids().unwrap();
1711
1712        // We should have some entities in the call graph
1713        assert!(entity_ids.len() > 0, "Should have call graph entities");
1714    }
1715}
magellan/graph/algorithms.rs

magellan/graph/
algorithms.rs