vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
//! Input generators for deterministic test case production.
//!
//! Each generator targets a specific class of inputs (edge cases, random,
//! boundary lengths, pathological patterns) and produces them based on
//! the op's type signature.

mod boundary;
mod edge_cases;
mod exhaustive;
mod pathological;
mod random;
mod u32_pathological;

pub use boundary::BoundaryLengths;
pub use edge_cases::EdgeCases;
pub use exhaustive::ExhaustiveByteRange;
pub use pathological::Pathological;
pub use random::RandomUniform;
pub use u32_pathological::U32Pathological;

use crate::spec::types::OpSignature;

/// Generates deterministic inputs based on an op's type signature and identity.
///
/// Generators produce inputs based on the signature's input types. The `op_id`
/// parameter enables op-specific edge case generation (e.g., shift-by-32 for
/// `shl`, offset+count > 32 for `extract_bits`).
///
/// Adding a new `DataType` or signature shape never requires modifying existing
/// generators — they simply return empty for shapes they don't handle.
pub trait InputGenerator: Send + Sync {
    /// Human-readable name for this generator (used in failure reports).
    fn name(&self) -> &str;

    /// Whether this generator can produce inputs for the given signature.
    fn handles(&self, signature: &OpSignature) -> bool;

    /// Generate labeled inputs for the given signature.
    /// Returns (label, bytes) pairs. Label is human-readable for failure reports.
    /// Same signature + same seed = same outputs. Deterministic always.
    fn generate(&self, signature: &OpSignature, seed: u64) -> Vec<(String, Vec<u8>)>;

    /// Generate labeled inputs with knowledge of which op is being tested.
    ///
    /// Override this to produce op-specific edge cases. The default delegates
    /// to `generate()`, ignoring the op ID — backwards compatible with all
    /// existing generators.
    fn generate_for_op(
        &self,
        op_id: &str,
        signature: &OpSignature,
        seed: u64,
    ) -> Vec<(String, Vec<u8>)> {
        let _ = op_id;
        self.generate(signature, seed)
    }

    /// Emit labeled inputs without requiring the caller to retain them all.
    ///
    /// Large-scale generators should override this method and call `emit` one
    /// case at a time. The default preserves existing generators by delegating
    /// to [`InputGenerator::generate_for_op`].
    fn generate_for_op_streaming(
        &self,
        op_id: &str,
        signature: &OpSignature,
        seed: u64,
        emit: &mut dyn FnMut(String, Vec<u8>),
    ) {
        for (label, bytes) in self.generate_for_op(op_id, signature, seed) {
            emit(label, bytes);
        }
    }
}

/// Default set of all built-in generators for conformance testing.
#[inline]
pub fn default_generators() -> Vec<Box<dyn InputGenerator>> {
    vec![
        Box::new(ExhaustiveByteRange),
        Box::new(EdgeCases),
        Box::new(RandomUniform),
        Box::new(Pathological),
        Box::new(BoundaryLengths),
        Box::new(U32Pathological),
    ]
}

#[inline]
pub(crate) fn u32_bytes(value: u32) -> Vec<u8> {
    value.to_le_bytes().to_vec()
}

#[inline]
pub(crate) fn pair_bytes(left: u32, right: u32) -> Vec<u8> {
    let mut bytes = Vec::with_capacity(8);
    bytes.extend_from_slice(&left.to_le_bytes());
    bytes.extend_from_slice(&right.to_le_bytes());
    bytes
}

#[inline]
pub(crate) fn triple_bytes(a: u32, b: u32, c: u32) -> Vec<u8> {
    let mut bytes = Vec::with_capacity(12);
    bytes.extend_from_slice(&a.to_le_bytes());
    bytes.extend_from_slice(&b.to_le_bytes());
    bytes.extend_from_slice(&c.to_le_bytes());
    bytes
}

/// Centralized signature shape matchers.
///
/// Every generator uses these instead of defining private copies.
/// Adding a new shape (e.g., `is_ternary_u32`) means editing ONE file.
pub(crate) mod sig {
    use crate::{DataType, OpSignature};

    /// Single u32 input → u32 output.
    #[inline]
    pub fn is_unary_u32(signature: &OpSignature) -> bool {
        signature.inputs == [DataType::U32] && signature.output == DataType::U32
    }

    /// Two u32 inputs → u32 output.
    #[inline]
    pub fn is_binary_u32(signature: &OpSignature) -> bool {
        signature.inputs == [DataType::U32, DataType::U32] && signature.output == DataType::U32
    }

    /// Three u32 inputs → u32 output.
    #[inline]
    pub fn is_ternary_u32(signature: &OpSignature) -> bool {
        signature.inputs == [DataType::U32, DataType::U32, DataType::U32]
            && signature.output == DataType::U32
    }

    /// Raw byte-buffer input.
    #[inline]
    pub fn is_byte_input(signature: &OpSignature) -> bool {
        signature.inputs == [DataType::Bytes]
    }

    /// Any u32-based shape (unary, binary, or ternary).
    #[inline]
    pub fn is_u32_any(signature: &OpSignature) -> bool {
        is_unary_u32(signature) || is_binary_u32(signature) || is_ternary_u32(signature)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use super::{default_generators, pair_bytes, sig, triple_bytes, u32_bytes, OpSignature};
    use crate::spec::types::DataType;
    use std::collections::HashSet;

    fn unary_sig() -> OpSignature {
        OpSignature {
            inputs: vec![DataType::U32],
            output: DataType::U32,
        }
    }

    fn binary_sig() -> OpSignature {
        OpSignature {
            inputs: vec![DataType::U32, DataType::U32],
            output: DataType::U32,
        }
    }

    fn byte_sig() -> OpSignature {
        OpSignature {
            inputs: vec![DataType::Bytes],
            output: DataType::Bytes,
        }
    }

    #[test]
    fn u32_bytes_encoding() {
        assert_eq!(u32_bytes(0), vec![0, 0, 0, 0]);
        assert_eq!(u32_bytes(1), vec![1, 0, 0, 0]);
        assert_eq!(u32_bytes(0xDEAD_BEEF), vec![0xEF, 0xBE, 0xAD, 0xDE]);
    }

    #[test]
    fn pair_bytes_encoding() {
        let bytes = pair_bytes(1, 2);
        assert_eq!(bytes.len(), 8);
        assert_eq!(&bytes[..4], &[1, 0, 0, 0]);
        assert_eq!(&bytes[4..], &[2, 0, 0, 0]);
    }

    #[test]
    fn triple_bytes_encoding() {
        let bytes = triple_bytes(1, 2, 3);
        assert_eq!(bytes.len(), 12);
    }

    #[test]
    fn sig_matchers_unary() {
        assert!(sig::is_unary_u32(&unary_sig()));
        assert!(!sig::is_binary_u32(&unary_sig()));
        assert!(!sig::is_byte_input(&unary_sig()));
    }

    #[test]
    fn sig_matchers_binary() {
        assert!(!sig::is_unary_u32(&binary_sig()));
        assert!(sig::is_binary_u32(&binary_sig()));
    }

    #[test]
    fn sig_matchers_bytes() {
        assert!(!sig::is_unary_u32(&byte_sig()));
        assert!(sig::is_byte_input(&byte_sig()));
    }

    #[test]
    fn default_generators_non_empty() {
        let gens = default_generators();
        assert!(gens.len() >= 5, "expected at least 5 generators");
    }

    #[test]
    fn all_generators_have_unique_names() {
        let gens = default_generators();
        let names: HashSet<_> = gens.iter().map(|g| g.name()).collect();
        assert_eq!(
            names.len(),
            gens.len(),
            "duplicate generator names detected"
        );
    }

    #[test]
    fn every_generator_handles_unary_or_binary_or_bytes() {
        let gens = default_generators();
        let sigs = [unary_sig(), binary_sig(), byte_sig()];
        for gen in &gens {
            let handles_any = sigs.iter().any(|s| gen.handles(s));
            assert!(
                handles_any,
                "generator '{}' handles no known signature shape",
                gen.name()
            );
        }
    }

    #[test]
    fn generators_produce_deterministic_output() -> Result<(), String> {
        let gens = default_generators();
        let sig = binary_sig();
        let seed = 42;
        for gen in &gens {
            if !gen.handles(&sig) {
                continue;
            }
            let run1 = gen.generate(&sig, seed);
            let Some(fresh) = default_generators()
                .into_iter()
                .find(|g| g.name() == gen.name())
            else {
                return Err(format!(
                    "Fix: default_generators must preserve generator '{}' across calls",
                    gen.name()
                ));
            };
            let run2 = fresh.generate(&sig, seed);
            assert_eq!(
                run1.len(),
                run2.len(),
                "generator '{}' produced different counts on same seed",
                gen.name()
            );
            for (a, b) in run1.iter().zip(run2.iter()) {
                assert_eq!(
                    a,
                    b,
                    "generator '{}' produced different output on same seed",
                    gen.name()
                );
            }
        }
        Ok(())
    }

    #[test]
    fn generators_produce_deterministic_output_for_op() -> Result<(), String> {
        let gens = default_generators();
        let sig = binary_sig();
        let seed = 42;
        let op_id = "test_op";
        for gen in &gens {
            if !gen.handles(&sig) {
                continue;
            }
            let run1 = gen.generate_for_op(op_id, &sig, seed);
            let Some(fresh) = default_generators()
                .into_iter()
                .find(|g| g.name() == gen.name())
            else {
                return Err(format!(
                    "Fix: default_generators must preserve generator '{}' across calls",
                    gen.name()
                ));
            };
            let run2 = fresh.generate_for_op(op_id, &sig, seed);
            assert_eq!(
                run1.len(),
                run2.len(),
                "generator '{}' produced different counts on same seed for generate_for_op",
                gen.name()
            );
            for (a, b) in run1.iter().zip(run2.iter()) {
                assert_eq!(
                    a,
                    b,
                    "generator '{}' produced different output on same seed for generate_for_op",
                    gen.name()
                );
            }
        }
        Ok(())
    }

    #[test]
    fn generators_produce_correct_input_sizes_for_binary() {
        let gens = default_generators();
        let sig = binary_sig();
        for gen in &gens {
            if !gen.handles(&sig) {
                continue;
            }
            for (label, bytes) in gen.generate(&sig, 0) {
                assert!(
                    bytes.len() >= 8,
                    "generator '{}' produced {}-byte input for binary op (need >=8): {}",
                    gen.name(),
                    bytes.len(),
                    label
                );
            }
        }
    }

    #[test]
    fn generators_produce_correct_input_sizes_for_unary() {
        let gens = default_generators();
        let sig = unary_sig();
        for gen in &gens {
            if !gen.handles(&sig) {
                continue;
            }
            for (label, bytes) in gen.generate(&sig, 0) {
                assert!(
                    bytes.len() >= 4,
                    "generator '{}' produced {}-byte input for unary op (need >=4): {}",
                    gen.name(),
                    bytes.len(),
                    label
                );
            }
        }
    }

    #[test]
    fn edge_case_generator_produces_nonzero_count() {
        let gen = EdgeCases;
        let inputs = gen.generate(&binary_sig(), 0);
        // Edge values: ~36 unique values → 36*36 = ~1296 pairs
        assert!(inputs.len() > 100, "edge cases too few: {}", inputs.len());
    }

    #[test]
    fn exhaustive_byte_range_covers_0_to_255_for_unary() {
        let gen = ExhaustiveByteRange;
        let inputs = gen.generate(&unary_sig(), 0);
        assert_eq!(inputs.len(), 256, "expected exactly 256 exhaustive inputs");
    }

    #[test]
    fn random_uniform_seed_sensitivity() {
        let gen = RandomUniform;
        let a = gen.generate(&unary_sig(), 12345);
        let b = gen.generate(&unary_sig(), 98765);
        // Widely-separated seeds must produce different inputs.
        assert_ne!(a[0].1, b[0].1, "different seeds produced same first input");
    }

    #[test]
    fn u32_pathological_covers_shift_boundaries() {
        let gen = U32Pathological;
        let inputs = gen.generate(&binary_sig(), 0);
        let labels: HashSet<_> = inputs.iter().map(|i| i.0.as_str()).collect();
        assert!(labels.contains("patho:shift:0"), "missing shift=0");
        assert!(labels.contains("patho:shift:31"), "missing shift=31");
        assert!(labels.contains("patho:shift:32"), "missing shift=32");
        assert!(labels.contains("patho:rot_0"), "missing rot=0");
    }
}