Skip to main content

mirage/cfg/
mod.rs

1// CFG data structures shared by MIR and AST pipelines
2
3pub mod analysis;
4pub mod ast;
5pub mod coordinates;
6pub mod diff;
7pub mod dominance_frontiers;
8pub mod dominators;
9pub mod edge;
10pub mod export;
11pub mod git_utils;
12pub mod hotpaths;
13pub mod icfg;
14pub mod loops;
15pub mod paths;
16pub mod patterns;
17pub mod post_dominators;
18pub mod reachability;
19pub mod source;
20pub mod summary;
21
22// Re-export for public API convenience
23pub use crate::storage::{
24    load_cfg_from_db, resolve_function_name, resolve_function_name_with_file,
25};
26#[allow(unused_imports)]
27pub use analysis::{find_entry, find_exits};
28
29#[cfg(feature = "sqlite")]
30pub use crate::storage::load_cfg_from_db_with_conn;
31pub use dominance_frontiers::compute_dominance_frontiers;
32pub use dominators::DominatorTree;
33pub use edge::EdgeType;
34pub use export::{export_dot, export_json, CFGExport};
35#[allow(unused_imports)] // Public API re-export, used directly by consumers
36pub use hotpaths::{compute_hot_paths, HotPath, HotpathsOptions};
37pub use loops::detect_natural_loops;
38#[allow(unused_imports)] // Used in tests within the module
39pub use paths::{
40    enumerate_paths, enumerate_paths_cached, enumerate_paths_cached_with_context,
41    enumerate_paths_incremental, enumerate_paths_iterative, enumerate_paths_with_context,
42    enumerate_paths_with_metadata, get_or_enumerate_paths, EnumerationContext, EnumerationStats,
43    IncrementalPathsResult, LimitsHit, Path, PathEnumerationResult, PathKind, PathLimits,
44};
45pub use patterns::{detect_if_else_patterns, detect_match_patterns};
46pub use post_dominators::PostDominatorTree;
47pub use reachability::{compute_path_impact, find_reachable_from_block, PathImpact};
48pub use source::SourceLocation;
49pub use summary::summarize_path;
50
51use anyhow::Result;
52use petgraph::graph::DiGraph;
53use serde::{Deserialize, Serialize};
54use std::collections::HashMap;
55
56/// Control Flow Graph
57pub type Cfg = DiGraph<BasicBlock, EdgeType>;
58
59/// Build CFG edges from Magellan's terminator strings
60///
61/// This function constructs edges in memory by analyzing each block's terminator
62/// and determining successor blocks. Per RESEARCH.md Pattern 2, edges are derived
63/// from terminator data rather than queried from the database.
64///
65/// # Arguments
66///
67/// * `graph` - The CFG graph to add edges to (already populated with nodes)
68/// * `blocks` - Block data from Magellan's cfg_blocks table
69/// * `db_id_to_node` - Mapping from database block IDs to graph node indices
70///
71/// # Returns
72///
73/// * `Ok(())` - Edges constructed successfully
74/// * `Err(...)` - Error if construction fails
75///
76/// # Edge Construction Rules
77///
78/// - "fallthrough" -> Single EdgeType::Fallthrough edge to next block by byte order
79/// - "conditional" -> Two edges: TrueBranch (next block), FalseBranch (block after that)
80/// - "goto" -> Single EdgeType::Fallthrough edge to next block by byte order
81/// - "return" | "panic" -> No outgoing edges (exit block)
82/// - "break" | "continue" -> No edges (loop control - handled in analysis phase)
83/// - "call" -> EdgeType::Call edge to next block by byte order
84///
85/// # Notes
86///
87/// - Uses byte offsets to determine control flow order (not sequential indices)
88/// - Blocks are sorted by byte_start to determine execution order
89/// - Loop back-edges will be detected during loop analysis phase
90pub fn build_edges_from_terminators(
91    graph: &mut Cfg,
92    blocks: &[(
93        i64,
94        String,
95        Option<String>,
96        Option<i64>,
97        Option<i64>,
98        Option<i64>,
99        Option<i64>,
100        Option<i64>,
101        Option<i64>,
102        Option<i64>,
103        Option<i64>,
104        Option<i64>,
105    )],
106    db_id_to_node: &HashMap<i64, usize>,
107) -> Result<()> {
108    use petgraph::graph::NodeIndex;
109
110    // Sort blocks by byte_start to get execution order
111    // This is crucial because block IDs are not necessarily in control flow order
112    let mut blocks_with_idx: Vec<(
113        usize,
114        &(
115            i64,
116            String,
117            Option<String>,
118            Option<i64>,
119            Option<i64>,
120            Option<i64>,
121            Option<i64>,
122            Option<i64>,
123            Option<i64>,
124            Option<i64>,
125            Option<i64>,
126            Option<i64>,
127        ),
128    )> = blocks.iter().enumerate().collect();
129    blocks_with_idx.sort_by_key(|(_, (_, _, _, byte_start, _, _, _, _, _, _, _, _))| *byte_start);
130
131    // Build a map from position in sorted order to node index
132    let mut sorted_pos_to_node: HashMap<usize, usize> = HashMap::new();
133    for (sorted_pos, (_original_idx, (db_id, _, _, _, _, _, _, _, _, _, _, _))) in
134        blocks_with_idx.iter().enumerate()
135    {
136        if let Some(&node_idx) = db_id_to_node.get(db_id) {
137            sorted_pos_to_node.insert(sorted_pos, node_idx);
138        }
139    }
140
141    // For each block in sorted order, analyze terminator to find successors
142    for (sorted_pos, (_original_idx, (_, _kind, terminator_opt, _, _, _, _, _, _, _, _, _))) in
143        blocks_with_idx.iter().enumerate()
144    {
145        let terminator = terminator_opt.as_deref().unwrap_or("");
146        let current_node = *sorted_pos_to_node.get(&sorted_pos).ok_or_else(|| {
147            anyhow::anyhow!("Block at position {} not found in node map", sorted_pos)
148        })?;
149
150        match terminator {
151            "fallthrough" | "goto" => {
152                // Edge to next block in byte order
153                if sorted_pos + 1 < blocks_with_idx.len() {
154                    if let Some(&target_node) = sorted_pos_to_node.get(&(sorted_pos + 1)) {
155                        graph.add_edge(
156                            NodeIndex::new(current_node),
157                            NodeIndex::new(target_node),
158                            EdgeType::Fallthrough,
159                        );
160                    }
161                }
162            }
163            "conditional" => {
164                // Two edges: true and false branches
165                // True branch is next block in byte order (if branch)
166                if sorted_pos + 1 < blocks_with_idx.len() {
167                    if let Some(&true_target) = sorted_pos_to_node.get(&(sorted_pos + 1)) {
168                        graph.add_edge(
169                            NodeIndex::new(current_node),
170                            NodeIndex::new(true_target),
171                            EdgeType::TrueBranch,
172                        );
173                    }
174                }
175                // False branch is block after next (else/end)
176                if sorted_pos + 2 < blocks_with_idx.len() {
177                    if let Some(&false_target) = sorted_pos_to_node.get(&(sorted_pos + 2)) {
178                        graph.add_edge(
179                            NodeIndex::new(current_node),
180                            NodeIndex::new(false_target),
181                            EdgeType::FalseBranch,
182                        );
183                    }
184                }
185            }
186            "return" | "panic" => {
187                // No outgoing edges (exit block)
188            }
189            "break" | "continue" => {
190                // Loop exit/back edges - will need proper target resolution
191                // For now, no edge (will be refined with loop analysis)
192            }
193            "call" => {
194                // Function call - edge to next block (normal return path)
195                if sorted_pos + 1 < blocks_with_idx.len() {
196                    if let Some(&target_node) = sorted_pos_to_node.get(&(sorted_pos + 1)) {
197                        graph.add_edge(
198                            NodeIndex::new(current_node),
199                            NodeIndex::new(target_node),
200                            EdgeType::Call,
201                        );
202                    }
203                }
204            }
205            _ => {
206                // Unknown terminator - no edge
207            }
208        }
209    }
210    Ok(())
211}
212
213/// Build CFG edges from Magellan's cfg_edges table data
214///
215/// This function constructs edges in memory by reading the cfg_edges table,
216/// which stores edges as vector indices within the function's block list.
217///
218/// # Arguments
219///
220/// * `graph` - The CFG graph to add edges to (already populated with nodes)
221/// * `edges` - Edge rows from cfg_edges table: (source_idx, target_idx, edge_type)
222/// * `index_to_node` - Mapping from vector index (0,1,2...) to graph node index
223///
224/// # Returns
225///
226/// * `Ok(())` - Edges constructed successfully
227/// * `Err(...)`` - Error if index mapping fails
228///
229/// # Edge Type Mapping
230///
231/// | Magellan edge_type | Mirage EdgeType |
232/// |-------------------|-----------------|
233/// | fallthrough | Fallthrough |
234/// | conditional_true | TrueBranch |
235/// | conditional_false | FalseBranch |
236/// | back_edge | LoopBack |
237/// | call | Call |
238/// | return | Return |
239/// | jump | Fallthrough (break/continue fallback) |
240pub fn build_edges_from_cfg_edges(
241    graph: &mut Cfg,
242    edges: &[(i64, i64, String)],
243    index_to_node: &std::collections::HashMap<usize, usize>,
244) -> Result<()> {
245    use petgraph::graph::NodeIndex;
246
247    for (source_idx, target_idx, edge_type_str) in edges {
248        let source_node = *index_to_node
249            .get(&(*source_idx as usize))
250            .ok_or_else(|| anyhow::anyhow!("Source index {} not found in block map", source_idx))?;
251        let target_node = *index_to_node
252            .get(&(*target_idx as usize))
253            .ok_or_else(|| anyhow::anyhow!("Target index {} not found in block map", target_idx))?;
254
255        let edge_type = match edge_type_str.as_str() {
256            "fallthrough" => EdgeType::Fallthrough,
257            "conditional_true" => EdgeType::TrueBranch,
258            "conditional_false" => EdgeType::FalseBranch,
259            "back_edge" => EdgeType::LoopBack,
260            "call" => EdgeType::Call,
261            "return" => EdgeType::Return,
262            "jump" => EdgeType::Fallthrough,
263            _ => EdgeType::Fallthrough,
264        };
265
266        graph.add_edge(
267            NodeIndex::new(source_node),
268            NodeIndex::new(target_node),
269            edge_type,
270        );
271    }
272
273    Ok(())
274}
275
276/// Basic block in a CFG
277#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct BasicBlock {
279    /// Unique identifier within the function (graph node index)
280    pub id: BlockId,
281    /// Original database block ID from cfg_blocks (for coverage/schema lookups)
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub db_id: Option<i64>,
284    /// Block kind (entry, normal, exit)
285    pub kind: BlockKind,
286    /// Statements in this block (simplified for now)
287    pub statements: Vec<String>,
288    /// Terminator instruction
289    pub terminator: Terminator,
290    /// Source location for this block (if available)
291    pub source_location: Option<SourceLocation>,
292    /// 4D Spatial Coordinates
293    /// X coordinate: dominator depth (control flow hierarchy level)
294    pub coord_x: i64,
295    /// Y coordinate: loop nesting depth (how many loops surround this block)
296    pub coord_y: i64,
297    /// Z coordinate: branch distance from entry point
298    pub coord_z: i64,
299}
300
301/// Block identifier
302pub type BlockId = usize;
303
304/// Block classification
305#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
306pub enum BlockKind {
307    Entry,
308    Normal,
309    Exit,
310}
311
312/// Terminator instruction (simplified representation)
313#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
314pub enum Terminator {
315    Goto {
316        target: BlockId,
317    },
318    SwitchInt {
319        targets: Vec<BlockId>,
320        otherwise: BlockId,
321    },
322    Return,
323    Unreachable,
324    Call {
325        target: Option<BlockId>,
326        unwind: Option<BlockId>,
327    },
328    Abort(String),
329}