Skip to main content

mirage/cfg/
diff.rs

1//! CFG diff algorithms for comparing control flow graphs
2//!
3//! This module provides functionality to compare CFGs between two snapshots,
4//! detecting added/deleted/modified blocks and edges with structural similarity scores.
5//!
6//! # Design
7//!
8//! - Uses petgraph for graph representation and algorithms
9//! - Computes set differences for blocks and edges
10//! - Calculates structural similarity based on graph edit distance
11//! - Supports SQLite and geometric backends via StorageTrait
12//!
13//! # Examples
14//!
15//! ```ignore
16//! # use mirage_analyzer::cfg::diff::{compute_cfg_diff, CfgDiff};
17//! # use mirage_analyzer::storage::Backend;
18//! # use anyhow::Result;
19//! # fn main() -> Result<()> {
20//! let backend = Backend::detect_and_open("codegraph.db")?;
21//! let diff = compute_cfg_diff(&backend, 123, "current", "current")?;
22//! println!("Similarity: {:.1}%", diff.structural_similarity * 100.0);
23//! # Ok(())
24//! # }
25//! ```
26
27use anyhow::Result;
28use petgraph::graph::DiGraph;
29use serde::{Deserialize, Serialize};
30use std::collections::{HashMap, HashSet};
31
32use crate::storage::{Backend, CfgBlockData};
33
34// ============================================================================
35// Diff Output Structures
36// ============================================================================
37
38/// CFG diff result comparing two snapshots of a function
39///
40/// Contains all changes detected between the before and after snapshots,
41/// including added/deleted/modified blocks and edges, plus a similarity score.
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct CfgDiff {
44    /// Function entity ID
45    pub function_id: i64,
46    /// Function fully-qualified name
47    pub function_name: String,
48    /// Before snapshot identifier (transaction ID or "current")
49    pub before_snapshot: String,
50    /// After snapshot identifier (transaction ID or "current")
51    pub after_snapshot: String,
52    /// Blocks added in after snapshot
53    pub added_blocks: Vec<BlockDiff>,
54    /// Blocks deleted from before snapshot
55    pub deleted_blocks: Vec<BlockDiff>,
56    /// Blocks modified between snapshots
57    pub modified_blocks: Vec<BlockChange>,
58    /// Edges added in after snapshot
59    pub added_edges: Vec<EdgeDiff>,
60    /// Edges deleted from before snapshot
61    pub deleted_edges: Vec<EdgeDiff>,
62    /// Structural similarity score (0.0 = completely different, 1.0 = identical)
63    pub structural_similarity: f64,
64}
65
66/// Representation of a single block for diff purposes
67#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
68pub struct BlockDiff {
69    /// Block unique identifier
70    pub block_id: i64,
71    /// Block kind (entry, conditional, loop, match, return, etc.)
72    pub kind: String,
73    /// Terminator kind (fallthrough, conditional, return, etc.)
74    pub terminator: String,
75    /// Source location (file:line:col format)
76    pub source_location: String,
77}
78
79/// Block change detected between two snapshots
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct BlockChange {
82    /// Block unique identifier
83    pub block_id: i64,
84    /// Block state in before snapshot
85    pub before: BlockDiff,
86    /// Block state in after snapshot
87    pub after: BlockDiff,
88    /// Type of change detected
89    pub change_type: ChangeType,
90}
91
92/// Type of change detected for a block
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub enum ChangeType {
95    /// Terminator instruction changed
96    TerminatorChanged { before: String, after: String },
97    /// Source location changed (block moved within file)
98    SourceLocationChanged,
99    /// Both terminator and location changed
100    BothChanged,
101    /// Block's outgoing edges changed
102    EdgesChanged,
103}
104
105/// Representation of a single edge for diff purposes
106#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
107pub struct EdgeDiff {
108    /// Source block ID
109    pub from_block: i64,
110    /// Target block ID
111    pub to_block: i64,
112    /// Edge type (fallthrough, true_branch, false_branch, etc.)
113    pub edge_type: String,
114}
115
116// ============================================================================
117// Diff Computation
118// ============================================================================
119
120/// Compute CFG diff between two snapshots
121///
122/// This function compares the CFG of a function at two different snapshots,
123/// detecting structural changes and computing a similarity score.
124///
125/// # Arguments
126///
127/// * `backend` - Storage backend for querying CFG data
128/// * `function_id` - Function entity ID to compare
129/// * `before_snapshot` - Snapshot identifier for "before" state (transaction ID or "current")
130/// * `after_snapshot` - Snapshot identifier for "after" state (transaction ID or "current")
131///
132/// # Returns
133///
134/// * `Ok(CfgDiff)` - Diff result with all detected changes
135/// * `Err(...)` - Error if query or computation fails
136///
137/// # Note
138///
139/// The current implementation queries the current state for both snapshots.
140/// Future versions will support true snapshot-based comparison using SnapshotId.
141pub fn compute_cfg_diff(
142    backend: &Backend,
143    function_id: i64,
144    before_snapshot: &str,
145    after_snapshot: &str,
146) -> Result<CfgDiff> {
147    // Query function name
148    let function_name = backend
149        .get_entity(function_id)
150        .map(|e| e.name.clone())
151        .unwrap_or_else(|| format!("<function_{}>", function_id));
152
153    // Query CFG blocks
154    // Note: Current implementation uses current state for both snapshots
155    // TODO: Add snapshot_id parameter to StorageTrait::get_cfg_blocks
156    let blocks_before = backend.get_cfg_blocks(function_id)?;
157    let blocks_after = backend.get_cfg_blocks(function_id)?;
158
159    // Build block maps with sequential IDs for diff
160    let before_map: HashMap<i64, BlockDiff> = blocks_before
161        .iter()
162        .enumerate()
163        .map(|(idx, b)| {
164            (
165                idx as i64,
166                BlockDiff {
167                    block_id: idx as i64,
168                    kind: b.kind.clone(),
169                    terminator: b.terminator.clone(),
170                    source_location: format!(
171                        "{}:{}:{}-{}:{}",
172                        "", /* file could be added later */
173                        b.start_line,
174                        b.start_col,
175                        b.end_line,
176                        b.end_col
177                    ),
178                },
179            )
180        })
181        .collect();
182
183    let after_map: HashMap<i64, BlockDiff> = blocks_after
184        .iter()
185        .enumerate()
186        .map(|(idx, b)| {
187            (
188                idx as i64,
189                BlockDiff {
190                    block_id: idx as i64,
191                    kind: b.kind.clone(),
192                    terminator: b.terminator.clone(),
193                    source_location: format!(
194                        "{}:{}:{}-{}:{}",
195                        "", /* file could be added later */
196                        b.start_line,
197                        b.start_col,
198                        b.end_line,
199                        b.end_col
200                    ),
201                },
202            )
203        })
204        .collect();
205
206    // Compute block set differences
207    let before_ids: HashSet<i64> = before_map.keys().copied().collect();
208    let after_ids: HashSet<i64> = after_map.keys().copied().collect();
209
210    let added_block_ids: Vec<_> = after_ids.difference(&before_ids).copied().collect();
211    let deleted_block_ids: Vec<_> = before_ids.difference(&after_ids).copied().collect();
212    let common_ids: Vec<_> = before_ids.intersection(&after_ids).copied().collect();
213
214    // Build added blocks
215    let added_blocks: Vec<BlockDiff> = added_block_ids
216        .iter()
217        .filter_map(|id| after_map.get(id).cloned())
218        .collect();
219
220    // Build deleted blocks
221    let deleted_blocks: Vec<BlockDiff> = deleted_block_ids
222        .iter()
223        .filter_map(|id| before_map.get(id).cloned())
224        .collect();
225
226    // Detect modified blocks
227    let modified_blocks: Vec<BlockChange> = common_ids
228        .iter()
229        .filter_map(|id| {
230            let before = before_map.get(id)?;
231            let after = after_map.get(id)?;
232
233            // Check if anything changed
234            if before == after {
235                return None;
236            }
237
238            // Determine change type
239            let terminator_changed = before.terminator != after.terminator;
240            let location_changed = before.source_location != after.source_location;
241
242            let change_type = match (terminator_changed, location_changed) {
243                (true, false) => ChangeType::TerminatorChanged {
244                    before: before.terminator.clone(),
245                    after: after.terminator.clone(),
246                },
247                (false, true) => ChangeType::SourceLocationChanged,
248                (true, true) => ChangeType::BothChanged,
249                (false, false) => return None, // No change
250            };
251
252            Some(BlockChange {
253                block_id: *id,
254                before: before.clone(),
255                after: after.clone(),
256                change_type,
257            })
258        })
259        .collect();
260
261    // For edges, we derive them from block terminators
262    // since we don't have explicit edge storage yet
263    let (added_edges, deleted_edges) = compute_edge_diff(&before_map, &after_map)?;
264
265    // Calculate structural similarity
266    let total_changes = added_blocks.len()
267        + deleted_blocks.len()
268        + modified_blocks.len()
269        + added_edges.len()
270        + deleted_edges.len();
271
272    let total_blocks_before = before_map.len().max(1);
273    let structural_similarity = if total_changes == 0 {
274        1.0
275    } else {
276        // Simple heuristic: 1 - (changes / (blocks * 2))
277        // Factor of 2 accounts for edges contributing to structure
278        let max_changes = total_blocks_before * 2;
279        let similarity_ratio = 1.0 - (total_changes as f64 / max_changes.max(1) as f64);
280        similarity_ratio.max(0.0)
281    };
282
283    Ok(CfgDiff {
284        function_id,
285        function_name,
286        before_snapshot: before_snapshot.to_string(),
287        after_snapshot: after_snapshot.to_string(),
288        added_blocks,
289        deleted_blocks,
290        modified_blocks,
291        added_edges,
292        deleted_edges,
293        structural_similarity,
294    })
295}
296
297/// Compute edge differences between two CFG snapshots
298///
299/// Derives edges from terminator information and compares them.
300/// Uses a simplified representation since explicit edge storage is not available.
301///
302/// # Returns
303///
304/// * `(added_edges, deleted_edges)` - Tuple of added and deleted edges
305fn compute_edge_diff(
306    before: &HashMap<i64, BlockDiff>,
307    after: &HashMap<i64, BlockDiff>,
308) -> Result<(Vec<EdgeDiff>, Vec<EdgeDiff>)> {
309    // Derive edges from terminators
310    let before_edges = derive_edges(before);
311    let after_edges = derive_edges(after);
312
313    let before_set: HashSet<_> = before_edges.iter().collect();
314    let after_set: HashSet<_> = after_edges.iter().collect();
315
316    let added: Vec<_> = after_set
317        .difference(&before_set)
318        .map(|e| (*e).clone())
319        .collect();
320
321    let deleted: Vec<_> = before_set
322        .difference(&after_set)
323        .map(|e| (*e).clone())
324        .collect();
325
326    Ok((added, deleted))
327}
328
329/// Derive edges from block terminators
330///
331/// This is a simplified implementation that creates edges based on
332/// terminator kind and sequential block ordering.
333/// Future versions will query actual edge data from the database.
334fn derive_edges(blocks: &HashMap<i64, BlockDiff>) -> Vec<EdgeDiff> {
335    let mut edges = Vec::new();
336    let mut block_ids: Vec<_> = blocks.keys().copied().collect();
337    block_ids.sort();
338
339    for (idx, &block_id) in block_ids.iter().enumerate() {
340        let block = match blocks.get(&block_id) {
341            Some(b) => b,
342            None => continue,
343        };
344
345        match block.terminator.as_str() {
346            "fallthrough" | "goto" => {
347                // Edge to next block
348                if idx + 1 < block_ids.len() {
349                    edges.push(EdgeDiff {
350                        from_block: block_id,
351                        to_block: block_ids[idx + 1],
352                        edge_type: "fallthrough".to_string(),
353                    });
354                }
355            }
356            "conditional" => {
357                // Two edges: true and false branches
358                if idx + 1 < block_ids.len() {
359                    edges.push(EdgeDiff {
360                        from_block: block_id,
361                        to_block: block_ids[idx + 1],
362                        edge_type: "true_branch".to_string(),
363                    });
364                }
365                if idx + 2 < block_ids.len() {
366                    edges.push(EdgeDiff {
367                        from_block: block_id,
368                        to_block: block_ids[idx + 2],
369                        edge_type: "false_branch".to_string(),
370                    });
371                }
372            }
373            "return" | "panic" => {
374                // No outgoing edges
375            }
376            "call" => {
377                // Edge to next block (return path)
378                if idx + 1 < block_ids.len() {
379                    edges.push(EdgeDiff {
380                        from_block: block_id,
381                        to_block: block_ids[idx + 1],
382                        edge_type: "call".to_string(),
383                    });
384                }
385            }
386            _ => {
387                // Unknown terminator - no edges
388            }
389        }
390    }
391
392    edges
393}
394
395/// Convert CFG blocks to petgraph for algorithmic operations
396///
397/// This creates a DiGraph representation suitable for petgraph algorithms
398/// like isomorphism checking and edit distance computation.
399///
400/// # Arguments
401///
402/// * `blocks` - CFG block data
403///
404/// # Returns
405///
406/// * `DiGraph<i64, ()>` - Graph where nodes are block IDs and edges are unlabeled
407pub fn blocks_to_petgraph(blocks: &[CfgBlockData]) -> DiGraph<i64, ()> {
408    let mut graph = DiGraph::new();
409
410    // Add nodes
411    let mut node_indices = HashMap::new();
412    for (idx, _block) in blocks.iter().enumerate() {
413        let block_id = idx as i64;
414        let node_idx = graph.add_node(block_id);
415        node_indices.insert(block_id, node_idx);
416    }
417
418    // Add edges based on terminators
419    for (idx, block) in blocks.iter().enumerate() {
420        let from_id = idx as i64;
421        let from_idx = node_indices[&from_id];
422
423        match block.terminator.as_str() {
424            "fallthrough" | "goto" => {
425                if idx + 1 < blocks.len() {
426                    let to_idx = node_indices[&((idx + 1) as i64)];
427                    graph.add_edge(from_idx, to_idx, ());
428                }
429            }
430            "conditional" => {
431                if idx + 1 < blocks.len() {
432                    let to_idx = node_indices[&((idx + 1) as i64)];
433                    graph.add_edge(from_idx, to_idx, ());
434                }
435                if idx + 2 < blocks.len() {
436                    let to_idx = node_indices[&((idx + 2) as i64)];
437                    graph.add_edge(from_idx, to_idx, ());
438                }
439            }
440            "call" => {
441                if idx + 1 < blocks.len() {
442                    let to_idx = node_indices[&((idx + 1) as i64)];
443                    graph.add_edge(from_idx, to_idx, ());
444                }
445            }
446            _ => {
447                // No edges for return, panic, etc.
448            }
449        }
450    }
451
452    graph
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458    use crate::storage::CfgBlockData;
459
460    #[test]
461    fn test_block_diff_equality() {
462        let block1 = BlockDiff {
463            block_id: 1,
464            kind: "entry".to_string(),
465            terminator: "fallthrough".to_string(),
466            source_location: "1:0-10:0".to_string(),
467        };
468
469        let block2 = BlockDiff {
470            block_id: 1,
471            kind: "entry".to_string(),
472            terminator: "fallthrough".to_string(),
473            source_location: "1:0-10:0".to_string(),
474        };
475
476        assert_eq!(block1, block2);
477    }
478
479    #[test]
480    fn test_blocks_to_petgraph() {
481        let blocks = vec![
482            CfgBlockData {
483                id: 0,
484                kind: "entry".to_string(),
485                terminator: "fallthrough".to_string(),
486                byte_start: 0,
487                byte_end: 10,
488                start_line: 1,
489                start_col: 0,
490                end_line: 2,
491                end_col: 0,
492                coord_x: 0,
493                coord_y: 0,
494                coord_z: 0,
495            },
496            CfgBlockData {
497                id: 1,
498                kind: "normal".to_string(),
499                terminator: "return".to_string(),
500                byte_start: 10,
501                byte_end: 20,
502                start_line: 2,
503                start_col: 0,
504                end_line: 3,
505                end_col: 0,
506                coord_x: 0,
507                coord_y: 0,
508                coord_z: 0,
509            },
510        ];
511
512        let graph = blocks_to_petgraph(&blocks);
513
514        assert_eq!(graph.node_count(), 2);
515        assert_eq!(graph.edge_count(), 1);
516    }
517
518    #[test]
519    fn test_derive_edges() {
520        let mut blocks = HashMap::new();
521
522        blocks.insert(
523            0,
524            BlockDiff {
525                block_id: 0,
526                kind: "entry".to_string(),
527                terminator: "fallthrough".to_string(),
528                source_location: "1:0-5:0".to_string(),
529            },
530        );
531
532        blocks.insert(
533            1,
534            BlockDiff {
535                block_id: 1,
536                kind: "normal".to_string(),
537                terminator: "return".to_string(),
538                source_location: "5:0-10:0".to_string(),
539            },
540        );
541
542        let edges = derive_edges(&blocks);
543
544        assert_eq!(edges.len(), 1);
545        assert_eq!(edges[0].from_block, 0);
546        assert_eq!(edges[0].to_block, 1);
547        assert_eq!(edges[0].edge_type, "fallthrough");
548    }
549
550    #[test]
551    fn test_compute_edge_diff() {
552        let mut before = HashMap::new();
553        before.insert(
554            0,
555            BlockDiff {
556                block_id: 0,
557                kind: "entry".to_string(),
558                terminator: "fallthrough".to_string(),
559                source_location: "1:0-5:0".to_string(),
560            },
561        );
562        // Need a second block for fallthrough to target
563        before.insert(
564            1,
565            BlockDiff {
566                block_id: 1,
567                kind: "normal".to_string(),
568                terminator: "return".to_string(),
569                source_location: "5:0-10:0".to_string(),
570            },
571        );
572
573        let mut after = HashMap::new();
574        after.insert(
575            0,
576            BlockDiff {
577                block_id: 0,
578                kind: "entry".to_string(),
579                terminator: "return".to_string(), // Changed: no more fallthrough
580                source_location: "1:0-5:0".to_string(),
581            },
582        );
583        // Keep the second block
584        after.insert(
585            1,
586            BlockDiff {
587                block_id: 1,
588                kind: "normal".to_string(),
589                terminator: "return".to_string(),
590                source_location: "5:0-10:0".to_string(),
591            },
592        );
593
594        let (added, deleted) = compute_edge_diff(&before, &after).unwrap();
595
596        // Before had an edge (0->1 fallthrough), after has none
597        assert_eq!(added.len(), 0);
598        assert_eq!(deleted.len(), 1);
599    }
600}