vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
//! Dataflow fixpoint — monotone forward dataflow analysis on GPU.
//!
//! Compiler analyses such as liveness, reaching definitions, and available
//! expressions are all iterative dataflow to a fixed point over a control-flow
//! graph.  Vyre provides that sequential coordination as a first-class
//! primitive.  The CPU reference performs a classic bitwise-OR forward
//! iteration with a bounded sweep count; the WGSL kernel performs the exact
//! same lattice join in workgroup-local SRAM, stopping when the change bit
//! goes cold.  This lets a model emit `dataflow_fixpoint` as an IR node
//! instead of hand-writing warp-synchronized shader loops.

use crate::ops::AlgebraicLaw;
use crate::ir::transform::compiler::{U32X2_OUTPUTS, U32X4_INPUTS};
use crate::lower::wgsl::compiler::wgsl_backend;
use crate::ops::{IntrinsicDescriptor, OpSpec};
use thiserror::Error;

/// Portable WGSL source for the dataflow fixpoint primitive.
#[must_use]
pub const fn source() -> &'static str {
    include_str!("../../../lower/wgsl/compiler/dataflow_fixpoint.wgsl")
}

/// Compute a monotone forward dataflow fixed point over a CSR successor graph.
///
/// Each node contributes `state[node] | transfer[node]` to every successor via
/// bitwise OR. This models compiler analyses whose states are bounded `u32`
/// lattices such as liveness or reaching definitions.
///
/// # Errors
///
/// Returns `Fix: ...` when graph buffers are malformed or convergence exceeds
/// `max_iterations`.
pub fn compute_fixpoint(
    initial_state: &[u32],
    transfer: &[u32],
    successor_offsets: &[u32],
    successors: &[u32],
    max_iterations: u32,
) -> Result<FixpointResult, DataflowFixpointError> {
    validate_graph(initial_state.len(), transfer, successor_offsets, successors)?;
    let mut state = initial_state.to_vec();
    for iteration in 0..max_iterations {
        let mut changed = false;
        for node in 0..state.len() {
            let propagated = state[node] | transfer[node];
            let start = usize::try_from(successor_offsets[node])
                .map_err(|_| DataflowFixpointError::OffsetOverflow)?;
            let end = usize::try_from(successor_offsets[node + 1])
                .map_err(|_| DataflowFixpointError::OffsetOverflow)?;
            for &successor in &successors[start..end] {
                let successor_index = usize::try_from(successor)
                    .map_err(|_| DataflowFixpointError::NodeIndexOverflow)?;
                let joined = state[successor_index] | propagated;
                if joined != state[successor_index] {
                    state[successor_index] = joined;
                    changed = true;
                }
            }
        }
        if !changed {
            return Ok(FixpointResult {
                state,
                iterations: iteration + 1,
            });
        }
    }
    Err(DataflowFixpointError::DidNotConverge { max_iterations })
}

/// Dataflow fixpoint validation errors.
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum DataflowFixpointError {
    /// Transfer table length does not match node count.
    #[error("DataflowTransferLength: expected {expected} transfer entries, got {got}. Fix: emit one transfer mask per CFG node.")]
    TransferLength {
        /// Expected transfer entries.
        expected: usize,
        /// Actual transfer entries.
        got: usize,
    },
    /// CSR offset length is invalid.
    #[error("DataflowOffsetLength: expected {expected} offsets, got {got}. Fix: emit node_count + 1 CSR offsets.")]
    OffsetLength {
        /// Expected offsets.
        expected: usize,
        /// Actual offsets.
        got: usize,
    },
    /// CSR offsets are not monotone or exceed successor length.
    #[error("DataflowInvalidOffset: CSR offsets must be monotone and within successors. Fix: rebuild successor_offsets.")]
    InvalidOffset,
    /// CSR offset cannot fit in host index space.
    #[error("DataflowOffsetOverflow: CSR offset cannot fit usize. Fix: split the graph before dispatch.")]
    OffsetOverflow,
    /// Node id cannot fit in host index space.
    #[error("DataflowNodeIndexOverflow: node id cannot fit usize. Fix: split the graph before dispatch.")]
    NodeIndexOverflow,
    /// Successor node id is outside the graph.
    #[error("DataflowInvalidSuccessor: successor {successor} outside node_count {node_count}. Fix: validate CFG edge endpoints.")]
    InvalidSuccessor {
        /// Invalid successor id.
        successor: u32,
        /// Node count.
        node_count: usize,
    },
    /// Iteration cap was reached before convergence.
    #[error("DataflowDidNotConverge: no fixed point within {max_iterations} iterations. Fix: raise the bounded iteration cap or inspect non-monotone transfer data.")]
    DidNotConverge {
        /// Maximum number of iterations attempted.
        max_iterations: u32,
    },
}

/// Category C iterative dataflow intrinsic.
#[derive(Debug, Default, Clone, Copy)]
pub struct DataflowFixpointOp;

/// Fixed-point state and iteration count.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FixpointResult {
    /// Converged per-node bitset state.
    pub state: Vec<u32>,
    /// Number of full graph sweeps executed.
    pub iterations: u32,
}

impl DataflowFixpointOp {
    /// Declarative operation specification.
    pub const SPEC: OpSpec = OpSpec::intrinsic(
        "compiler_primitives.dataflow_fixpoint",
        U32X4_INPUTS,
        U32X2_OUTPUTS,
        LAWS,
        wgsl_backend,
        IntrinsicDescriptor::new(
            "compiler_primitives_dataflow_fixpoint",
            "workgroup_change_bit",
            crate::ops::cpu_op::structured_intrinsic_cpu,
        ),
    );
}

/// Algebraic laws declared by the dataflow-fixpoint primitive.
pub const LAWS: &[AlgebraicLaw] = &[AlgebraicLaw::Bounded {
    lo: 0,
    hi: u32::MAX,
}];

/// Validate that transfer, offsets, and successors describe a well-formed
/// dataflow graph.
///
/// # Errors
///
/// Returns `Fix: ...` when buffer lengths mismatch, offsets are invalid, or
/// any successor index overflows the node space.
pub fn validate_graph(
    node_count: usize,
    transfer: &[u32],
    offsets: &[u32],
    successors: &[u32],
) -> Result<(), DataflowFixpointError> {
    if transfer.len() != node_count {
        return Err(DataflowFixpointError::TransferLength {
            expected: node_count,
            got: transfer.len(),
        });
    }
    if offsets.len() != node_count.saturating_add(1) {
        return Err(DataflowFixpointError::OffsetLength {
            expected: node_count.saturating_add(1),
            got: offsets.len(),
        });
    }
    let mut previous = 0usize;
    for &offset in offsets {
        let current = usize::try_from(offset).map_err(|_| DataflowFixpointError::OffsetOverflow)?;
        if current < previous || current > successors.len() {
            return Err(DataflowFixpointError::InvalidOffset);
        }
        previous = current;
    }
    for &successor in successors {
        let index =
            usize::try_from(successor).map_err(|_| DataflowFixpointError::NodeIndexOverflow)?;
        if index >= node_count {
            return Err(DataflowFixpointError::InvalidSuccessor {
                successor,
                node_count,
            });
        }
    }
    Ok(())
}

/// Workgroup size used by the reference WGSL lowering.
pub const WORKGROUP_SIZE: [u32; 3] = [64, 1, 1];