Skip to main content

mirage/cfg/paths/
mod.rs

1//! Path enumeration for CFG analysis
2//!
3//! This module provides data structures and algorithms for discovering
4//! all execution paths through a function's control flow graph from entry
5//! to exit. Paths are discovered using depth-first search with cycle
6//! detection and loop bounding to prevent infinite recursion.
7//!
8//! ## Feasibility Checking
9//!
10//! This module provides **STATIC** feasibility checking only. It does NOT
11//! perform symbolic execution or data flow analysis.
12//!
13//! ### What we check:
14//! - Entry block is actually Entry kind
15//! - Exit block has valid terminator (Return, Abort, Call with target)
16//! - All blocks are reachable from entry
17//! - No dead ends (Goto/SwitchInt as last terminator)
18//!
19//! ### What we DON'T check (requires symbolic execution):
20//! - Conflicting branch conditions (e.g., `if x > 5 && x < 3`)
21//! - Data-dependent constraints (array bounds, divide by zero)
22//! - Runtime panic conditions
23//!
24//! ### Tradeoff:
25//! Static checking is fast (O(n)) and sound (never falsely claims feasible).
26//! Symbolic execution is precise but slow (>100x) and complex.
27//!
28//! For most code intelligence queries, static checking is sufficient.
29//! Future work may add symbolic execution for specific paths.
30//!
31//! ## Path Classification
32//!
33//! Paths are categorized based on their structure and content:
34//! - **Normal:** Standard entry → return path
35//! - **Error:** Contains panic, abort, or error propagation
36//! - **Degenerate:** Dead end, infinite loop, or infeasible path
37//! - **Unreachable:** Statically unreachable code path
38
39pub mod cached;
40pub mod enumeration;
41pub mod feasibility;
42pub mod incremental;
43pub mod limits;
44pub mod metadata;
45
46pub use cached::{
47    check_path_explosion, enumerate_paths_cached, enumerate_paths_cached_with_context,
48    estimate_path_count,
49};
50pub use enumeration::{enumerate_paths, enumerate_paths_iterative, enumerate_paths_with_context};
51pub use feasibility::{
52    classify_path, classify_path_precomputed, is_feasible_path, is_feasible_path_precomputed,
53};
54pub use incremental::{enumerate_paths_incremental, IncrementalPathsResult};
55pub use limits::{hash_path, EnumerationContext, PathLimits};
56pub use metadata::{
57    enumerate_paths_with_metadata, get_or_enumerate_paths, EnumerationStats, LimitsHit,
58    PathEnumerationResult,
59};
60
61use crate::cfg::{BlockId, Cfg};
62use petgraph::graph::NodeIndex;
63use serde::{Deserialize, Serialize};
64
65/// Execution path through a CFG
66///
67/// Represents a sequence of basic blocks from an entry block to an exit block.
68/// Each path has a unique identifier derived from a BLAKE3 hash of the block
69/// sequence for deduplication and comparison.
70#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
71pub struct Path {
72    /// Unique identifier (BLAKE3 hash of block sequence)
73    pub path_id: String,
74    /// Ordered block IDs in execution order
75    pub blocks: Vec<BlockId>,
76    /// Classification of this path
77    pub kind: PathKind,
78    /// First block (entry)
79    pub entry: BlockId,
80    /// Last block (exit)
81    pub exit: BlockId,
82}
83
84impl Path {
85    /// Create a new path from a block sequence
86    pub fn new(blocks: Vec<BlockId>, kind: PathKind) -> Self {
87        let entry = *blocks.first().unwrap_or(&0);
88        let exit = *blocks.last().unwrap_or(&0);
89        let path_id = hash_path(&blocks);
90
91        Self {
92            path_id,
93            blocks,
94            kind,
95            entry,
96            exit,
97        }
98    }
99
100    /// Create a path with a pre-existing path_id (for loading from cache)
101    ///
102    /// # Arguments
103    ///
104    /// * `path_id` - The stored path identifier
105    /// * `blocks` - Ordered block IDs in execution order
106    /// * `kind` - Path classification
107    ///
108    /// # Note
109    ///
110    /// This bypasses the normal path_id computation. Use only when loading
111    /// previously stored paths where the path_id was already computed.
112    pub fn with_id(path_id: String, blocks: Vec<BlockId>, kind: PathKind) -> Self {
113        let entry = *blocks.first().unwrap_or(&0);
114        let exit = *blocks.last().unwrap_or(&0);
115
116        Self {
117            path_id,
118            blocks,
119            kind,
120            entry,
121            exit,
122        }
123    }
124
125    /// Get the length of this path (number of blocks)
126    pub fn len(&self) -> usize {
127        self.blocks.len()
128    }
129
130    /// Check if this path is empty
131    pub fn is_empty(&self) -> bool {
132        self.blocks.is_empty()
133    }
134
135    /// Get an iterator over the blocks in this path
136    pub fn iter(&self) -> impl Iterator<Item = &BlockId> {
137        self.blocks.iter()
138    }
139
140    /// Check if this path contains a specific block
141    pub fn contains(&self, block_id: BlockId) -> bool {
142        self.blocks.contains(&block_id)
143    }
144}
145
146impl std::fmt::Display for Path {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        write!(f, "{}", self.path_id)
149    }
150}
151
152/// Classification of execution paths
153///
154/// Paths are categorized based on their structure and content.
155/// Classification is used for analysis and reporting.
156#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
157pub enum PathKind {
158    /// Standard entry -> return path
159    Normal,
160    /// Contains panic, abort, or error propagation
161    Error,
162    /// Dead end or infinite loop (no valid exit)
163    Degenerate,
164    /// Statically unreachable code path
165    Unreachable,
166}
167
168impl PathKind {
169    /// Check if this path represents a normal execution
170    pub fn is_normal(&self) -> bool {
171        matches!(self, Self::Normal)
172    }
173
174    /// Check if this path represents an error condition
175    pub fn is_error(&self) -> bool {
176        matches!(self, Self::Error)
177    }
178
179    /// Check if this path is degenerate (abnormal structure)
180    pub fn is_degenerate(&self) -> bool {
181        matches!(self, Self::Degenerate)
182    }
183
184    /// Check if this path is unreachable
185    pub fn is_unreachable(&self) -> bool {
186        matches!(self, Self::Unreachable)
187    }
188}
189
190/// Find the NodeIndex for a given BlockId
191///
192/// Helper function to convert BlockIds from paths to NodeIndices for CFG queries.
193/// Returns None if the block ID doesn't exist in the CFG.
194fn find_node_by_block_id(cfg: &Cfg, block_id: BlockId) -> Option<NodeIndex> {
195    cfg.node_indices().find(|&idx| cfg[idx].id == block_id)
196}
197
198#[cfg(test)]
199mod test_fixtures;
200#[cfg(test)]
201mod tests_enumeration;
202#[cfg(test)]
203mod tests_enumeration_advanced;
204#[cfg(test)]
205mod tests_feasibility;
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    use petgraph::graph::DiGraph;
211
212    #[test]
213    fn test_hash_path_deterministic() {
214        let blocks = vec![0, 1, 2];
215        let hash1 = hash_path(&blocks);
216        let hash2 = hash_path(&blocks);
217
218        assert_eq!(hash1, hash2, "Same input should produce same hash");
219    }
220
221    #[test]
222    fn test_hash_path_different_sequences() {
223        let blocks1 = vec![0, 1, 2];
224        let blocks2 = vec![0, 2, 1];
225
226        assert_ne!(hash_path(&blocks1), hash_path(&blocks2));
227    }
228
229    #[test]
230    fn test_hash_path_length_collision_protection() {
231        let blocks1 = vec![1, 2, 3];
232        let blocks2 = vec![1, 2, 3, 0];
233
234        assert_ne!(hash_path(&blocks1), hash_path(&blocks2));
235    }
236
237    #[test]
238    fn test_path_new() {
239        let blocks = vec![0, 1, 2];
240        let path = Path::new(blocks.clone(), PathKind::Normal);
241
242        assert_eq!(path.blocks, blocks);
243        assert_eq!(path.entry, 0);
244        assert_eq!(path.exit, 2);
245        assert_eq!(path.kind, PathKind::Normal);
246        assert!(!path.path_id.is_empty());
247    }
248
249    #[test]
250    fn test_path_len() {
251        let blocks = vec![0, 1, 2];
252        let path = Path::new(blocks, PathKind::Normal);
253
254        assert_eq!(path.len(), 3);
255        assert!(!path.is_empty());
256    }
257
258    #[test]
259    fn test_path_contains() {
260        let blocks = vec![0, 1, 2];
261        let path = Path::new(blocks, PathKind::Normal);
262
263        assert!(path.contains(0));
264        assert!(path.contains(1));
265        assert!(path.contains(2));
266        assert!(!path.contains(3));
267    }
268
269    #[test]
270    fn test_path_limits_default() {
271        let limits = PathLimits::default();
272
273        assert_eq!(limits.max_length, 1000);
274        assert_eq!(limits.max_paths, 10000);
275        assert_eq!(limits.loop_unroll_limit, 3);
276    }
277
278    #[test]
279    fn test_path_limits_custom() {
280        let limits = PathLimits::new(100, 500, 5);
281
282        assert_eq!(limits.max_length, 100);
283        assert_eq!(limits.max_paths, 500);
284        assert_eq!(limits.loop_unroll_limit, 5);
285    }
286
287    #[test]
288    fn test_path_limits_builder() {
289        let limits = PathLimits::default()
290            .with_max_length(200)
291            .with_max_paths(1000)
292            .with_loop_unroll_limit(10);
293
294        assert_eq!(limits.max_length, 200);
295        assert_eq!(limits.max_paths, 1000);
296        assert_eq!(limits.loop_unroll_limit, 10);
297    }
298
299    #[test]
300    fn test_path_kind_is_normal() {
301        assert!(PathKind::Normal.is_normal());
302        assert!(!PathKind::Error.is_normal());
303        assert!(!PathKind::Degenerate.is_normal());
304        assert!(!PathKind::Unreachable.is_normal());
305    }
306
307    #[test]
308    fn test_path_kind_is_error() {
309        assert!(PathKind::Error.is_error());
310        assert!(!PathKind::Normal.is_error());
311    }
312
313    #[test]
314    fn test_path_kind_is_degenerate() {
315        assert!(PathKind::Degenerate.is_degenerate());
316        assert!(!PathKind::Normal.is_degenerate());
317    }
318
319    #[test]
320    fn test_path_kind_is_unreachable() {
321        assert!(PathKind::Unreachable.is_unreachable());
322        assert!(!PathKind::Normal.is_unreachable());
323    }
324
325    fn create_linear_cfg() -> Cfg {
326        test_fixtures::create_linear_cfg()
327    }
328
329    #[test]
330    fn test_find_node_by_block_id_existing() {
331        let cfg = create_linear_cfg();
332
333        let b0 = find_node_by_block_id(&cfg, 0);
334        let b1 = find_node_by_block_id(&cfg, 1);
335        let b2 = find_node_by_block_id(&cfg, 2);
336
337        assert!(b0.is_some());
338        assert!(b1.is_some());
339        assert!(b2.is_some());
340
341        assert_eq!(b0.unwrap().index(), 0);
342        assert_eq!(b1.unwrap().index(), 1);
343        assert_eq!(b2.unwrap().index(), 2);
344    }
345
346    #[test]
347    fn test_find_node_by_block_id_nonexistent() {
348        let cfg = create_linear_cfg();
349
350        let b99 = find_node_by_block_id(&cfg, 99);
351        assert!(b99.is_none());
352    }
353
354    #[test]
355    fn test_find_node_by_block_id_empty_cfg() {
356        let cfg: Cfg = DiGraph::new();
357
358        let b0 = find_node_by_block_id(&cfg, 0);
359        assert!(b0.is_none());
360    }
361}