vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
use crate::ir::{BufferDecl, DataType, Expr, Node, Program};
use crate::ops::{OpSpec, F32_F32_INPUTS, I32_OUTPUTS};

/// f32 three-way comparison returning -1, 0, 1 as `i32`.
///
/// # NaN policy (per audit `primitive-laws-kimi-AUDIT`, observation)
///
/// IEEE-754 `f32` comparisons are **not** a total order. If either input
/// is `NaN`, the comparison is *unordered* per the spec. This op
/// resolves the unordered case as follows:
///
/// - `NaN vs anything` and `anything vs NaN` → `0` (equal).
///   This mirrors the behavior obtained by composing the underlying
///   `Lt`/`Gt` selectors, which both return `false` on unordered
///   operands, causing the fallthrough path to emit the `0` branch.
///
/// This is an intentional choice, not an accident. Consumers that
/// require a total order with NaN-bit ordering must compose a separate
/// op on top of bit-pattern comparison (e.g. `f32.to_bits() as i32`
/// comparison after flipping the sign bit). The `Trichotomy` algebraic
/// law is **not** declared for this op because trichotomy fails under
/// this resolution.
///
/// # Implications for conform-gate laws
/// - `Reflexivity`: does NOT hold — `f32_cmp(NaN, NaN) = 0`, but
///   equality with `NaN` is philosophically unresolvable; in practice
///   callers must treat the `0` return from a NaN-involved comparison
///   as "cannot order", not as "equal".
/// - `Antisymmetry`: holds for non-NaN inputs; vacuously true for NaN
///   (both `a cmp b` and `b cmp a` return `0`).
/// - `Trichotomy`: **does not hold**; deliberately not declared.
#[derive(Debug, Clone, Copy, Default)]
pub struct F32Cmp;

impl F32Cmp {
    pub const SPEC: OpSpec = OpSpec::composition_inlinable(
        "primitive.float.f32_cmp",
        F32_F32_INPUTS,
        I32_OUTPUTS,
        &[],
        Self::program,
    );

    pub fn program() -> Program {
        let idx = Expr::var("idx");
        Program::new(
            vec![
                BufferDecl::read("a", 0, DataType::F32),
                BufferDecl::read("b", 1, DataType::F32),
                BufferDecl::output("out", 2, DataType::I32),
            ],
            crate::ops::primitive::WORKGROUP_SIZE,
            vec![
                Node::let_bind("idx", Expr::gid_x()),
                Node::if_then(
                    Expr::lt(idx.clone(), Expr::buf_len("out")),
                    vec![Node::store(
                        "out",
                        idx.clone(),
                        Expr::select(
                            Expr::lt(Expr::load("a", idx.clone()), Expr::load("b", idx.clone())),
                            Expr::i32(-1),
                            Expr::select(
                                Expr::gt(Expr::load("a", idx.clone()), Expr::load("b", idx.clone())),
                                Expr::i32(1),
                                Expr::i32(0),
                            ),
                        ),
                    )],
                ),
            ],
        )
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::ir::{self, DataType};

    #[test]
    fn program_is_non_empty() {
        assert!(!F32Cmp::program().entry().is_empty());
    }

    #[test]
    fn program_validates() {
        let errors = ir::validate(&F32Cmp::program());
        assert!(errors.is_empty(), "validation failed: {errors:?}");
    }

    #[test]
    fn spec_id_is_correct() {
        assert_eq!(F32Cmp::SPEC.id(), "primitive.float.f32_cmp");
    }

    #[test]
    fn spec_signature_is_f32_f32_to_i32() {
        assert_eq!(F32Cmp::SPEC.inputs(), &[DataType::F32, DataType::F32]);
        assert_eq!(F32Cmp::SPEC.outputs(), &[DataType::I32]);
    }
}