vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
//! Dominator tree — the control-flow primitive for borrow checking and
//! optimization passes.
//!
//! Every compiler needs dominator information: SSA construction, borrow-check
//! region inference, loop detection, and code motion all depend on it.  Vyre
//! provides `dominator_tree` as a first-class workgroup-local primitive.  The
//! CPU reference implements the classic Cooper-Harvey-Kennedy iterative
//! algorithm over a CSR CFG; the WGSL kernel performs the exact same
//! reverse-postorder walk and intersection in workgroup SRAM.  This is the
//! sequential-coordination abstraction that lets a model emit borrow-check
//! logic without ever writing a shader.

use crate::ops::AlgebraicLaw;
use crate::ir::transform::compiler::{U32X4_INPUTS, U32_OUTPUTS};
use crate::lower::wgsl::compiler::wgsl_backend;
use crate::ops::{IntrinsicDescriptor, OpSpec};
use thiserror::Error;

/// Portable WGSL source for the dominator-tree primitive.
#[must_use]
pub const fn source() -> &'static str {
    include_str!("../../../lower/wgsl/compiler/dominator_tree.wgsl")
}

/// Dominator-tree validation errors.
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum DominatorTreeError {
    /// Offset table has no terminal offset.
    #[error("DominatorEmptyOffsets: successor_offsets must include node_count + 1 entries. Fix: emit a valid CSR offset table.")]
    EmptyOffsets,
    /// Entry node is outside the CFG.
    #[error("DominatorInvalidEntry: entry {entry} outside node_count {node_count}. Fix: pass a valid CFG entry node.")]
    InvalidEntry {
        /// Invalid entry.
        entry: u32,
        /// CFG node count.
        node_count: usize,
    },
    /// CSR offset is invalid.
    #[error("DominatorInvalidOffset: CSR offsets must be monotone and within successors. Fix: rebuild successor_offsets.")]
    InvalidOffset,
    /// Node id cannot fit in host index space.
    #[error(
        "DominatorNodeIndexOverflow: node id cannot fit usize. Fix: split the CFG before dispatch."
    )]
    NodeIndexOverflow,
    /// Successor node is outside the CFG.
    #[error("DominatorInvalidSuccessor: successor {successor} outside node_count {node_count}. Fix: validate CFG edge endpoints.")]
    InvalidSuccessor {
        /// Invalid successor id.
        successor: u32,
        /// CFG node count.
        node_count: usize,
    },
}

/// Category C dominator-tree intrinsic.
#[derive(Debug, Default, Clone, Copy)]
pub struct DominatorTreeOp;

/// Compute immediate dominators using the Cooper-Harvey-Kennedy iteration.
///
/// Unreachable nodes receive `u32::MAX`; the entry immediately dominates
/// itself.
///
/// # Errors
///
/// Returns `Fix: ...` when CSR buffers are malformed or the entry is invalid.
pub fn immediate_dominators(
    entry: u32,
    successor_offsets: &[u32],
    successors: &[u32],
) -> Result<Vec<u32>, DominatorTreeError> {
    let node_count = successor_offsets
        .len()
        .checked_sub(1)
        .ok_or(DominatorTreeError::EmptyOffsets)?;
    let entry_index = usize::try_from(entry).map_err(|_| DominatorTreeError::NodeIndexOverflow)?;
    if entry_index >= node_count {
        return Err(DominatorTreeError::InvalidEntry { entry, node_count });
    }
    validate_graph(node_count, successor_offsets, successors)?;
    let rpo = reverse_postorder(entry_index, successor_offsets, successors)?;
    let mut order = vec![UNREACHABLE; node_count];
    for (rank, &node) in rpo.iter().enumerate() {
        order[node] = u32::try_from(rank).map_err(|_| DominatorTreeError::NodeIndexOverflow)?;
    }
    let preds = predecessors(node_count, successor_offsets, successors)?;
    let mut idom = vec![UNREACHABLE; node_count];
    idom[entry_index] = entry;
    let mut changed = true;
    while changed {
        changed = false;
        for &node in rpo.iter().skip(1) {
            let mut new_idom = UNREACHABLE;
            for &pred in &preds[node] {
                let pred_index =
                    usize::try_from(pred).map_err(|_| DominatorTreeError::NodeIndexOverflow)?;
                if idom[pred_index] != UNREACHABLE {
                    new_idom = if new_idom == UNREACHABLE {
                        pred
                    } else {
                        intersect(pred, new_idom, &idom, &order)?
                    };
                }
            }
            if idom[node] != new_idom {
                idom[node] = new_idom;
                changed = true;
            }
        }
    }
    Ok(idom)
}

impl DominatorTreeOp {
    /// Declarative operation specification.
    pub const SPEC: OpSpec = OpSpec::intrinsic(
        "compiler_primitives.dominator_tree",
        U32X4_INPUTS,
        U32_OUTPUTS,
        LAWS,
        wgsl_backend,
        IntrinsicDescriptor::new(
            "compiler_primitives_dominator_tree",
            "workgroup_cfg_analysis",
            crate::ops::cpu_op::structured_intrinsic_cpu,
        ),
    );
}

/// Safely cast a `u32` node id to `usize` for host indexing.
///
/// # Errors
///
/// Returns `DominatorTreeError::NodeIndexOverflow` if the value does not fit.
pub fn index(value: u32) -> Result<usize, DominatorTreeError> {
    usize::try_from(value).map_err(|_| DominatorTreeError::NodeIndexOverflow)
}

/// Intersect two node ids up the immediate-dominator tree.
///
/// Walks the higher-ranked node up the `idom` chain until both pointers meet.
/// This is the standard CHK intersect routine used during fixed-point
/// iteration.
pub fn intersect(
    mut left: u32,
    mut right: u32,
    idom: &[u32],
    order: &[u32],
) -> Result<u32, DominatorTreeError> {
    while left != right {
        while order[index(left)?] > order[index(right)?] {
            left = idom[index(left)?];
        }
        while order[index(right)?] > order[index(left)?] {
            right = idom[index(right)?];
        }
    }
    Ok(left)
}

/// Algebraic laws declared by the dominator-tree primitive.
pub const LAWS: &[AlgebraicLaw] = &[AlgebraicLaw::Bounded {
    lo: 0,
    hi: u32::MAX,
}];

/// Build a predecessor list for every CFG node from CSR successor data.
pub fn predecessors(
    node_count: usize,
    offsets: &[u32],
    successors: &[u32],
) -> Result<Vec<Vec<u32>>, DominatorTreeError> {
    let mut preds = vec![Vec::new(); node_count];
    for node in 0..node_count {
        for &succ in &successors[index(offsets[node])?..index(offsets[node + 1])?] {
            preds[index(succ)?]
                .push(u32::try_from(node).map_err(|_| DominatorTreeError::NodeIndexOverflow)?);
        }
    }
    Ok(preds)
}

/// Compute a reverse-postorder sequence starting from `entry`.
///
/// The CHK algorithm converges faster when nodes are processed in RPO.
/// This routine uses an explicit stack so the traversal bound is controlled
/// and mirrored by the WGSL reference.
pub fn reverse_postorder(
    entry: usize,
    offsets: &[u32],
    successors: &[u32],
) -> Result<Vec<usize>, DominatorTreeError> {
    let mut seen = vec![false; offsets.len() - 1];
    let mut stack = vec![(entry, false)];
    let mut postorder = Vec::new();
    while let Some((node, expanded)) = stack.pop() {
        if expanded {
            postorder.push(node);
            continue;
        }
        if seen[node] {
            continue;
        }
        seen[node] = true;
        stack.push((node, true));
        let start = index(offsets[node])?;
        let end = index(offsets[node + 1])?;
        for &succ in successors[start..end].iter().rev() {
            let succ_index = index(succ)?;
            if !seen[succ_index] {
                stack.push((succ_index, false));
            }
        }
    }
    postorder.reverse();
    Ok(postorder)
}

/// Sentinel value meaning "no immediate dominator" or "unreachable node".
pub const UNREACHABLE: u32 = u32::MAX;

/// Validate that CSR offsets and successors describe a well-formed CFG.
///
/// # Errors
///
/// Returns `Fix: ...` when offsets are non-monotone, out of range, or any
/// successor node id exceeds the graph bounds.
pub fn validate_graph(
    node_count: usize,
    offsets: &[u32],
    successors: &[u32],
) -> Result<(), DominatorTreeError> {
    let mut previous = 0usize;
    for &offset in offsets {
        let current = index(offset)?;
        if current < previous || current > successors.len() {
            return Err(DominatorTreeError::InvalidOffset);
        }
        previous = current;
    }
    for &successor in successors {
        if index(successor)? >= node_count {
            return Err(DominatorTreeError::InvalidSuccessor {
                successor,
                node_count,
            });
        }
    }
    Ok(())
}

/// Workgroup size used by the reference WGSL lowering.
pub const WORKGROUP_SIZE: [u32; 3] = [64, 1, 1];