geographdb-core 0.4.0

Geometric graph database core - 3D spatial indexing for code analysis
Documentation
//! Natural Loop Detection using Dominance Information
//!
//! Finds natural loops in a CFG using back-edges and dominators.
//!
//! # Definitions
//!
//! - **Back-edge**: Edge from node A to node B where B dominates A
//! - **Natural Loop**: Loop with a single entry point (header)
//! - **Header**: First node in loop, dominates all other loop nodes
//!
//! # Algorithm
//!
//! 1. Find all back-edges using dominator information
//! 2. For each back-edge, find loop body (nodes that can reach source without going through header)
//! 3. Return set of natural loops

use crate::algorithms::astar::CfgGraphNode;
use crate::algorithms::dominance::{compute_dominance, dominates};
use std::collections::{HashMap, HashSet, VecDeque};

/// A natural loop in the CFG
#[derive(Debug, Clone)]
pub struct NaturalLoop {
    /// Loop header (entry point)
    pub header: u64,
    /// Loop latch (node with back-edge to header)
    pub latch: u64,
    /// All nodes in the loop body
    pub body: HashSet<u64>,
    /// Predecessors outside the loop
    pub pre_header: HashSet<u64>,
}

/// Result of natural loop analysis
#[derive(Debug, Clone)]
pub struct NaturalLoopResult {
    /// All natural loops found
    pub loops: Vec<NaturalLoop>,
    /// Back-edges (source, target) pairs
    pub back_edges: Vec<(u64, u64)>,
}

/// Find all back-edges in the CFG
///
/// A back-edge is an edge from node A to node B where B dominates A.
pub fn find_back_edges(nodes: &[CfgGraphNode], entry_id: u64) -> Vec<(u64, u64)> {
    let dom_result = compute_dominance(nodes, entry_id);
    let mut back_edges = Vec::new();

    for node in nodes {
        for &succ in &node.successors {
            if dominates(succ, node.id, &dom_result) {
                back_edges.push((node.id, succ));
            }
        }
    }

    back_edges
}

/// Find all natural loops in the CFG
///
/// # Arguments
/// * `nodes` - CFG nodes
/// * `entry_id` - Entry node ID
///
/// # Returns
/// NaturalLoopResult with all loops and back-edges
pub fn find_natural_loops(nodes: &[CfgGraphNode], entry_id: u64) -> NaturalLoopResult {
    let back_edges = find_back_edges(nodes, entry_id);
    let mut loops = Vec::new();

    // Build predecessor map
    let mut predecessors: HashMap<u64, Vec<u64>> = HashMap::new();
    for node in nodes {
        for &succ in &node.successors {
            predecessors.entry(succ).or_default().push(node.id);
        }
    }

    // Find natural loop for each back-edge
    for (source, target) in &back_edges {
        let loop_body = find_loop_body(nodes, *source, *target, &predecessors);

        // Find pre-header (predecessors outside loop)
        let mut pre_header = HashSet::new();
        for &node_id in &loop_body {
            if let Some(preds) = predecessors.get(&node_id) {
                for &pred in preds {
                    if !loop_body.contains(&pred) {
                        pre_header.insert(pred);
                    }
                }
            }
        }

        loops.push(NaturalLoop {
            header: *target,
            latch: *source,
            body: loop_body,
            pre_header,
        });
    }

    NaturalLoopResult { loops, back_edges }
}

/// Find the body of a natural loop given a back-edge
fn find_loop_body(
    _nodes: &[CfgGraphNode],
    latch: u64,
    header: u64,
    predecessors: &HashMap<u64, Vec<u64>>,
) -> HashSet<u64> {
    let mut body = HashSet::new();
    body.insert(latch);
    body.insert(header);

    // Work backwards from latch to find all nodes that can reach latch
    // without going through header
    let mut worklist: VecDeque<u64> = VecDeque::new();
    worklist.push_back(latch);

    while let Some(node_id) = worklist.pop_front() {
        if let Some(preds) = predecessors.get(&node_id) {
            for &pred in preds {
                if !body.contains(&pred) {
                    body.insert(pred);
                    worklist.push_back(pred);
                }
            }
        }
    }

    body
}

/// Check if a node is a loop header (has incoming back-edge)
pub fn is_loop_header(node_id: u64, back_edges: &[(u64, u64)]) -> bool {
    back_edges.iter().any(|&(_, target)| target == node_id)
}

/// Get the nesting depth of a loop (how many loops contain it)
pub fn get_loop_nesting_depth(loop_header: u64, loops: &[NaturalLoop]) -> usize {
    let mut depth = 0;
    for loop_ in loops {
        if loop_.header != loop_header && loop_.body.contains(&loop_header) {
            depth += 1;
        }
    }
    depth
}

/// Find innermost loop containing a node (smallest loop body)
pub fn find_innermost_loop_for_node(node_id: u64, loops: &[NaturalLoop]) -> Option<&NaturalLoop> {
    loops
        .iter()
        .filter(|l| l.body.contains(&node_id))
        .min_by_key(|l| l.body.len())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_find_back_edges_no_loops() {
        // Linear: 0 -> 1 -> 2
        let nodes = vec![
            CfgGraphNode {
                id: 0,
                x: 0.0,
                y: 0.0,
                z: 0.0,
                successors: vec![1],
            },
            CfgGraphNode {
                id: 1,
                x: 1.0,
                y: 0.0,
                z: 0.0,
                successors: vec![2],
            },
            CfgGraphNode {
                id: 2,
                x: 2.0,
                y: 0.0,
                z: 0.0,
                successors: vec![],
            },
        ];

        let back_edges = find_back_edges(&nodes, 0);
        assert_eq!(back_edges.len(), 0);
    }

    #[test]
    fn test_find_back_edges_simple_loop() {
        // Loop: 0 -> 1 -> 2 -> 1
        let nodes = vec![
            CfgGraphNode {
                id: 0,
                x: 0.0,
                y: 0.0,
                z: 0.0,
                successors: vec![1],
            },
            CfgGraphNode {
                id: 1,
                x: 1.0,
                y: 0.0,
                z: 0.0,
                successors: vec![2],
            },
            CfgGraphNode {
                id: 2,
                x: 2.0,
                y: 0.0,
                z: 0.0,
                successors: vec![1],
            },
        ];

        let back_edges = find_back_edges(&nodes, 0);
        assert_eq!(back_edges.len(), 1);
        assert_eq!(back_edges[0], (2, 1));
    }

    #[test]
    fn test_find_natural_loops_simple() {
        // Loop: 0 -> 1 -> 2 -> 1
        let nodes = vec![
            CfgGraphNode {
                id: 0,
                x: 0.0,
                y: 0.0,
                z: 0.0,
                successors: vec![1],
            },
            CfgGraphNode {
                id: 1,
                x: 1.0,
                y: 0.0,
                z: 0.0,
                successors: vec![2],
            },
            CfgGraphNode {
                id: 2,
                x: 2.0,
                y: 0.0,
                z: 0.0,
                successors: vec![1],
            },
        ];

        let result = find_natural_loops(&nodes, 0);
        assert_eq!(result.loops.len(), 1);
        assert_eq!(result.back_edges.len(), 1);

        let loop_ = &result.loops[0];
        assert_eq!(loop_.header, 1);
        assert_eq!(loop_.latch, 2);
        assert!(loop_.body.contains(&1));
        assert!(loop_.body.contains(&2));
    }

    #[test]
    fn test_find_natural_loops_nested() {
        // Nested loops: outer 0->1->2->1, inner 1->3->4->3
        let nodes = vec![
            CfgGraphNode {
                id: 0,
                x: 0.0,
                y: 0.0,
                z: 0.0,
                successors: vec![1],
            },
            CfgGraphNode {
                id: 1,
                x: 1.0,
                y: 0.0,
                z: 0.0,
                successors: vec![2, 3],
            },
            CfgGraphNode {
                id: 2,
                x: 2.0,
                y: 0.0,
                z: 0.0,
                successors: vec![1],
            },
            CfgGraphNode {
                id: 3,
                x: 3.0,
                y: 0.0,
                z: 0.0,
                successors: vec![4],
            },
            CfgGraphNode {
                id: 4,
                x: 4.0,
                y: 0.0,
                z: 0.0,
                successors: vec![3],
            },
        ];

        let result = find_natural_loops(&nodes, 0);
        assert!(result.loops.len() >= 2);
    }

    #[test]
    fn test_is_loop_header() {
        let back_edges = vec![(2, 1), (4, 3)];
        assert!(is_loop_header(1, &back_edges));
        assert!(is_loop_header(3, &back_edges));
        assert!(!is_loop_header(0, &back_edges));
    }

    #[test]
    fn test_find_innermost_loop_for_node() {
        let loops = vec![
            NaturalLoop {
                header: 1,
                latch: 2,
                body: vec![1, 2, 3].into_iter().collect(),
                pre_header: vec![0].into_iter().collect(),
            },
            NaturalLoop {
                header: 2,
                latch: 3,
                body: vec![2, 3].into_iter().collect(),
                pre_header: vec![1].into_iter().collect(),
            },
        ];

        // Innermost = smallest loop containing the node
        let innermost = find_innermost_loop_for_node(3, &loops);
        assert!(innermost.is_some());
        assert_eq!(innermost.unwrap().header, 2);
    }
}