vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
//! Eval engine conformance specification.
//!
//! CPU reference: stack-machine evaluator for bytecode programs with match
//! events. Mirrors the semantics of `vyre::cpu::evaluate_programs`.

/// Serde module.
pub mod serde;
/// Core type definitions: `OpSpec`, `DataType`, `Convention`, etc.
pub mod types;
/// Vm module.
pub mod vm;

use crate::spec::{EngineInvariant, EngineSpec};

/// Build the eval engine conformance specification.
#[inline]
pub fn spec() -> EngineSpec {
    EngineSpec {
        id: "engine.eval",
        description: "Bytecode condition evaluator.",
        invariants: vec![
            EngineInvariant::Deterministic,
            EngineInvariant::BoundedResources,
        ],
        cpu_fn: Some(cpu_fn),
    }
}

/// CPU reference for the eval engine.
///
/// Input serialization is a custom binary format containing:
/// - programs count, then each program's bytecode
/// - evaluation plan fields
/// - match events
/// - file context
/// - file bytes
///
/// Output: one byte per program (0 or 1 indicating whether it fired).
#[inline]
pub fn cpu_fn(input: &[u8]) -> Vec<u8> {
    match serde::evaluate_from_bytes(input) {
        Ok(fired) => fired.into_iter().map(u8::from).collect(),
        Err(_) => vec![0xFF; 4],
    }
}

#[cfg(test)]
#[inline]
pub(crate) fn build_program_bytecode(instructions: &[(u32, u32)]) -> Vec<u8> {
    let mut out = Vec::new();
    out.extend_from_slice(b"YBC0");
    out.extend_from_slice(&(instructions.len() as u32).to_le_bytes());
    for &(opcode, operand) in instructions {
        out.extend_from_slice(&opcode.to_le_bytes());
        out.extend_from_slice(&operand.to_le_bytes());
    }
    out
}

#[cfg(test)]
#[inline]
pub(crate) fn build_eval_input(
    programs: &[Vec<u8>],
    plan: &types::ConformEvaluationPlan<'_>,
    matches: &[types::ConformMatchEvent],
    file_ctx: &types::ConformFileContext,
    file_bytes: &[u8],
) -> Vec<u8> {
    let mut out = Vec::new();
    out.extend_from_slice(&(programs.len() as u32).to_le_bytes());
    for prog in programs {
        out.extend_from_slice(&(prog.len() as u32).to_le_bytes());
        out.extend_from_slice(prog);
    }

    out.extend_from_slice(&(plan.program_signal_counts.len() as u32).to_le_bytes());
    for &c in plan.program_signal_counts {
        out.extend_from_slice(&(c as u32).to_le_bytes());
    }

    out.extend_from_slice(&(plan.signal_to_programs.len() as u32).to_le_bytes());
    for &[a, b] in plan.signal_to_programs {
        out.extend_from_slice(&a.to_le_bytes());
        out.extend_from_slice(&b.to_le_bytes());
    }

    out.extend_from_slice(&(plan.program_list.len() as u32).to_le_bytes());
    for &v in plan.program_list {
        out.extend_from_slice(&v.to_le_bytes());
    }

    out.extend_from_slice(&(plan.signal_local_ids.len() as u32).to_le_bytes());
    for &v in plan.signal_local_ids {
        out.extend_from_slice(&v.to_le_bytes());
    }

    out.extend_from_slice(&(plan.sentinel_signal_ids.len() as u32).to_le_bytes());
    for &v in plan.sentinel_signal_ids {
        out.extend_from_slice(&v.to_le_bytes());
    }

    out.extend_from_slice(&(plan.file_boundaries.len() as u32).to_le_bytes());
    for &v in plan.file_boundaries {
        out.extend_from_slice(&v.to_le_bytes());
    }

    out.extend_from_slice(&(plan.max_cached_positions as u32).to_le_bytes());
    out.extend_from_slice(&plan.max_fired.to_le_bytes());

    out.extend_from_slice(&(matches.len() as u32).to_le_bytes());
    for m in matches {
        out.extend_from_slice(&m.signal_id.to_le_bytes());
        out.extend_from_slice(&m.start.to_le_bytes());
        out.extend_from_slice(&m.end.to_le_bytes());
    }

    out.extend_from_slice(&file_ctx.file_size.to_le_bytes());
    out.extend_from_slice(&file_ctx.entropy_bucket.to_le_bytes());
    out.extend_from_slice(&file_ctx.magic_u32.to_le_bytes());
    out.extend_from_slice(&file_ctx.is_pe.to_le_bytes());
    out.extend_from_slice(&file_ctx.is_dll.to_le_bytes());
    out.extend_from_slice(&file_ctx.is_64bit.to_le_bytes());
    out.extend_from_slice(&file_ctx.has_signature.to_le_bytes());
    out.extend_from_slice(&file_ctx.num_sections.to_le_bytes());
    out.extend_from_slice(&file_ctx.num_imports.to_le_bytes());
    out.extend_from_slice(&file_ctx.entry_point_rva.to_le_bytes());
    out.extend_from_slice(&file_ctx.unique_pattern_count.to_le_bytes());
    out.extend_from_slice(&file_ctx.total_match_count.to_le_bytes());
    out.extend_from_slice(&file_ctx.file_age_seconds.to_le_bytes());

    out.extend_from_slice(&(file_bytes.len() as u32).to_le_bytes());
    out.extend_from_slice(file_bytes);
    out
}

#[cfg(test)]
pub(crate) mod tests {
    use super::{build_eval_input, build_program_bytecode, cpu_fn, spec};
    use crate::spec::EngineInvariant;

    #[test]
    fn spec_has_correct_invariants() {
        let s = spec();
        assert_eq!(s.id, "engine.eval");
        assert!(s.invariants.contains(&EngineInvariant::Deterministic));
        assert!(s.invariants.contains(&EngineInvariant::BoundedResources));
    }

    #[test]
    fn cpu_fn_is_deterministic() {
        let prog = build_program_bytecode(&[
            (1, 0),  // PushTrue
            (30, 0), // Halt
        ]);
        let plan = super::types::ConformEvaluationPlan {
            program_signal_counts: &[0],
            signal_to_programs: &[[0, 0]],
            program_list: &[],
            signal_local_ids: &[],
            sentinel_signal_ids: &[],
            file_boundaries: &[],
            max_cached_positions: 1,
            max_fired: 1024,
        };
        let input = build_eval_input(
            &[prog],
            &plan,
            &[],
            &super::types::ConformFileContext::default(),
            b"",
        );
        let out1 = cpu_fn(&input);
        let out2 = cpu_fn(&input);
        assert_eq!(out1, out2);
        assert_eq!(out1, vec![1]);
    }

    #[test]
    fn cpu_fn_evaluates_string_match() {
        // Program 0: PushStringMatched(0) -> Halt
        let prog = build_program_bytecode(&[
            (3, 0),  // PushStringMatched(signal 0)
            (30, 0), // Halt
        ]);
        let plan = super::types::ConformEvaluationPlan {
            program_signal_counts: &[1],
            signal_to_programs: &[[0, 1]],
            program_list: &[0],
            signal_local_ids: &[0],
            sentinel_signal_ids: &[],
            file_boundaries: &[],
            max_cached_positions: 1,
            max_fired: 1024,
        };
        let matches = vec![super::types::ConformMatchEvent {
            signal_id: 0,
            start: 5,
            end: 7,
        }];
        let input = build_eval_input(
            &[prog],
            &plan,
            &matches,
            &super::types::ConformFileContext::default(),
            b"",
        );
        let out = cpu_fn(&input);
        assert_eq!(out, vec![1]);
    }

    #[test]
    fn cpu_fn_bounded_resources_no_stack_overflow() {
        // Build a 32-deep stack: PushTrue 33 times, then And 32 times.
        let mut insts = Vec::new();
        for _ in 0..33 {
            insts.push((1, 0)); // PushTrue
        }
        for _ in 0..32 {
            insts.push((11, 0)); // And
        }
        insts.push((30, 0)); // Halt
        let prog = build_program_bytecode(&insts);
        let plan = super::types::ConformEvaluationPlan {
            program_signal_counts: &[0],
            signal_to_programs: &[[0, 0]],
            program_list: &[],
            signal_local_ids: &[],
            sentinel_signal_ids: &[],
            file_boundaries: &[],
            max_cached_positions: 1,
            max_fired: 1024,
        };
        let input = build_eval_input(
            &[prog],
            &plan,
            &[],
            &super::types::ConformFileContext::default(),
            b"",
        );
        let out = cpu_fn(&input);
        assert_eq!(out, vec![1]);
    }

    #[inline]
    pub(crate) fn build_eval_input_for_invariants() -> Vec<u8> {
        // Simple program: PushTrue -> Halt
        let prog = build_program_bytecode(&[
            (1, 0),  // PushTrue
            (30, 0), // Halt
        ]);
        let plan = super::types::ConformEvaluationPlan {
            program_signal_counts: &[0],
            signal_to_programs: &[[0, 0]],
            program_list: &[],
            signal_local_ids: &[],
            sentinel_signal_ids: &[],
            file_boundaries: &[],
            max_cached_positions: 1,
            max_fired: 1024,
        };
        build_eval_input(
            &[prog],
            &plan,
            &[],
            &super::types::ConformFileContext::default(),
            b"",
        )
    }
}