vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
use crate::ir::{BufferDecl, DataType, Expr, Node, Program};
use crate::ops::{AlgebraicLaw, OpSpec, U32_U32_INPUTS, U32_OUTPUTS};

// Least common multiple of two u32 values via the Euclidean GCD algorithm.
//
// lcm(a, b) = (a / gcd(a, b)) * b
//
// The GCD loop is bounded to 64 iterations — sufficient for all 32-bit inputs
// (worst-case Euclidean steps on u32 is 48 for consecutive Fibonacci numbers).


// lcm(a, lcm(b, c)) != lcm(lcm(a, b), c) under u32 wrapping multiplication:
// the Euclidean lcm formula `(x / gcd(x, y)) * y` overflows silently for
// operands whose product exceeds 2^32, and the associativity property
// depends on the exact-arithmetic equivalence of those two
// factorizations. Example that the declared-laws test caught on
// 2026-04-17: lcm(5, 5, u32::MAX) gives left=2147483643,
// right=2147483623. Associative is removed per the anti-rigging law;
// if an application needs associative LCM it must use u64 or BigInt.
pub const LAWS: &[AlgebraicLaw] = &[
    AlgebraicLaw::Commutative,
    AlgebraicLaw::Identity { element: 1 },
    AlgebraicLaw::LeftAbsorbing { element: 0 },
];

/// Least common multiple operation.
#[derive(Debug, Clone, Copy, Default)]
pub struct Lcm;

impl Lcm {
    /// Declarative operation specification.
    ///
    /// Laws are declared as explicit `AlgebraicLaw` values on `SPEC`.
    pub const SPEC: OpSpec = OpSpec::composition_inlinable(
        "primitive.math.lcm",
        U32_U32_INPUTS,
        U32_OUTPUTS,
        LAWS,
        Self::program,
    );

    /// Build the canonical IR program.
    ///
    /// # Examples
    ///
    /// ```
    /// use vyre::ops::primitive::lcm::Lcm;
    ///
    /// let program = Lcm::program();
    /// assert!(!program.entry().is_empty());
    /// ```
    #[must_use]
    pub fn program() -> Program {
        let idx = Expr::var("idx");
        Program::new(
            vec![
                BufferDecl::read("a", 0, DataType::U32),
                BufferDecl::read("b", 1, DataType::U32),
                BufferDecl::output("out", 2, DataType::U32),
            ],
            crate::ops::primitive::WORKGROUP_SIZE,
            vec![
                Node::let_bind("idx", Expr::gid_x()),
                Node::if_then(
                    Expr::lt(idx.clone(), Expr::buf_len("out")),
                    vec![
                        Node::let_bind("x", Expr::load("a", idx.clone())),
                        Node::let_bind("y", Expr::load("b", idx.clone())),
                        Node::let_bind("orig_a", Expr::var("x")),
                        Node::let_bind("orig_b", Expr::var("y")),
                        Node::loop_for(
                            "i",
                            Expr::u32(0),
                            Expr::u32(64),
                            vec![Node::if_then(
                                Expr::ne(Expr::var("y"), Expr::u32(0)),
                                vec![
                                    Node::let_bind("t", Expr::var("y")),
                                    Node::assign(
                                        "y",
                                        Expr::rem(Expr::var("x"), Expr::var("y")),
                                    ),
                                    Node::assign("x", Expr::var("t")),
                                ],
                            )],
                        ),
                        Node::let_bind("gcd", Expr::var("x")),
                        Node::store(
                            "out",
                            idx,
                            Expr::mul(
                                Expr::div(Expr::var("orig_a"), Expr::var("gcd")),
                                Expr::var("orig_b"),
                            ),
                        ),
                    ],
                ),
            ],
        )
    }
}

// WGSL lowering marker for `primitive.math.lcm`.
//
// Not a stub: this is a zero-overhead Category A marker. `Lcm::program`
// builds concrete IR through a custom composition in `kernel.rs`;
// `core/src/lower/wgsl/expr.rs` and `core/src/lower/wgsl/node.rs` emit WGSL.
// `core/tests/conformance.rs::conformance_all_primitives` verifies
// lowered GPU bytes are bit-exact against the conform CPU reference.
//
// ```wgsl
// var x = _vyre_load_a(idx);
// var y = _vyre_load_b(idx);
// let orig_a = x;
// let orig_b = y;
// for (var i = 0u; i < 64u; i = i + 1u) {
//     if (y != 0u) {
//         let t = y;
//         y = _vyre_safe_mod_u32(x, y);
//         x = t;
//     }
// }
// let gcd = x;
// _vyre_store_out(idx, (_vyre_safe_div_u32(orig_a, gcd) * orig_b));
// ```