vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
//! Serialization/deserialization for eval engine inputs.

use super::types::*;
use super::vm::{debug_diagnostic, evaluate_programs, MAX_RUNTIME_PROGRAMS};

const MAX_MATCH_EVENTS: usize = 1_000_000;
const MAX_EVAL_SLICE_WORDS: usize = 1_000_000;
const MAX_PROGRAM_INSTRUCTIONS: usize = 65_536;

/// Evaluate an eval-engine frame from its wire representation.
#[inline]
pub fn evaluate_from_bytes(input: &[u8]) -> Result<Vec<bool>, String> {
    let mut offset = 0usize;

    let program_count = read_u32(input, &mut offset)? as usize;
    if program_count > MAX_RUNTIME_PROGRAMS {
        return Err(format!(
            "RuntimeProgramCountTooLarge: program count exceeds {MAX_RUNTIME_PROGRAMS}. Fix: split the eval frame into bounded batches."
        ));
    }
    let mut programs = Vec::with_capacity(program_count);
    for _ in 0..program_count {
        let prog_len = read_u32(input, &mut offset)? as usize;
        let prog_bytes = read_bytes(input, &mut offset, prog_len)?;
        programs.push(parse_program(prog_bytes)?);
    }

    let plan = read_plan(input, &mut offset)?;
    let match_count = read_u32(input, &mut offset)? as usize;
    if match_count > MAX_MATCH_EVENTS {
        return Err(format!(
            "MatchEventCountTooLarge: match count exceeds {MAX_MATCH_EVENTS}. Fix: split the eval frame into bounded batches."
        ));
    }
    let mut matches = Vec::with_capacity(match_count);
    for _ in 0..match_count {
        matches.push(ConformMatchEvent {
            signal_id: read_u32(input, &mut offset)?,
            start: read_u32(input, &mut offset)?,
            end: read_u32(input, &mut offset)?,
        });
    }

    let file_ctx = read_file_ctx(input, &mut offset)?;
    let file_bytes_len = read_u32(input, &mut offset)? as usize;
    let file_bytes = read_bytes(input, &mut offset, file_bytes_len)?;

    evaluate_programs(&programs, plan, file_bytes, &matches, file_ctx)
}

fn read_u32(input: &[u8], offset: &mut usize) -> Result<u32, String> {
    let end = offset
        .checked_add(4)
        .ok_or("Fix: provide enough bytes to read a u32 without offset overflow.")?;
    let bytes = input
        .get(*offset..end)
        .ok_or("Fix: provide enough bytes to read a complete u32.")?;
    *offset = end;
    read_u32_chunk(bytes)
}

fn read_bytes<'a>(input: &'a [u8], offset: &mut usize, len: usize) -> Result<&'a [u8], String> {
    let end = offset
        .checked_add(len)
        .ok_or("Fix: provide a byte length that does not overflow the eval frame offset.")?;
    let bytes = input
        .get(*offset..end)
        .ok_or("Fix: provide enough bytes for the declared eval frame payload.")?;
    *offset = end;
    Ok(bytes)
}

fn parse_program(bytes: &[u8]) -> Result<ConformProgram, String> {
    if bytes.len() < 8 || &bytes[0..4] != b"YBC0" {
        return Err("Fix: provide eval bytecode starting with the YBC0 magic.".to_string());
    }
    let count = read_u32_chunk(&bytes[4..8])? as usize;
    if count > MAX_PROGRAM_INSTRUCTIONS {
        return Err(format!(
            "RuntimeInstructionCountTooLarge: instruction count exceeds {MAX_PROGRAM_INSTRUCTIONS}. Fix: split the program into a bounded bytecode block."
        ));
    }
    let expected = 8usize.checked_add(count.checked_mul(8).ok_or_else(|| {
        debug_diagnostic(format_args!(
            "signature mismatch: bytecode instruction count multiplication overflowed, count={count}"
        ));
        "Fix: keep bytecode instruction count small enough to compute its payload size."
            .to_string()
    })?)
    .ok_or_else(|| {
        debug_diagnostic(format_args!(
            "signature mismatch: bytecode expected-size addition overflowed, count={count}"
        ));
        "Fix: keep bytecode instruction payload size within usize bounds.".to_string()
    })?;
    if bytes.len() != expected {
        debug_diagnostic(format_args!(
            "signature mismatch: bytecode bytes_len={}, expected={expected}, instruction_count={count}",
            bytes.len()
        ));
        return Err(
            "Fix: provide bytecode whose declared instruction count matches its payload."
                .to_string(),
        );
    }
    let mut instructions = Vec::with_capacity(count);
    for chunk in bytes[8..].chunks_exact(8) {
        instructions.push(ConformInstruction {
            opcode: read_u32_chunk(&chunk[0..4])?,
            operand: read_u32_chunk(&chunk[4..8])?,
        });
    }
    Ok(ConformProgram { instructions })
}

fn read_plan<'a>(input: &'a [u8], offset: &mut usize) -> Result<ConformEvaluationPlan<'a>, String> {
    let program_signal_counts = read_u32_slice_as_usize(input, offset)?;
    let signal_to_programs = read_u32_pairs(input, offset)?;
    let program_list = read_u32_slice(input, offset)?;
    let signal_local_ids = read_u32_slice(input, offset)?;
    let sentinel_signal_ids = read_u32_slice(input, offset)?;
    let file_boundaries = read_u32_slice(input, offset)?;
    let max_cached_positions = read_u32(input, offset)? as usize;
    let max_fired = read_u32(input, offset)?;

    Ok(ConformEvaluationPlan {
        program_signal_counts,
        signal_to_programs,
        program_list,
        signal_local_ids,
        sentinel_signal_ids,
        file_boundaries,
        max_cached_positions,
        max_fired,
    })
}

fn read_u32_slice<'a>(input: &'a [u8], offset: &mut usize) -> Result<&'a [u32], String> {
    let len = read_u32(input, offset)? as usize;
    if len > MAX_EVAL_SLICE_WORDS {
        return Err(format!(
            "EvalSliceTooLarge: u32 slice length exceeds {MAX_EVAL_SLICE_WORDS}. Fix: split the eval frame into bounded batches."
        ));
    }
    let byte_len = len
        .checked_mul(4)
        .ok_or("Fix: keep u32 slice length small enough to compute byte length.")?;
    let bytes = read_bytes(input, offset, byte_len)?;
    let count = len;
    let leaked: &'static [u32] = vec_to_leaked_slice(
        bytes
            .chunks_exact(4)
            .map(read_u32_chunk)
            .collect::<Result<Vec<_>, _>>()?,
    );
    Ok(&leaked[..count])
}

fn read_u32_slice_as_usize<'a>(input: &'a [u8], offset: &mut usize) -> Result<&'a [usize], String> {
    let len = read_u32(input, offset)? as usize;
    if len > MAX_RUNTIME_PROGRAMS {
        return Err(format!(
            "RuntimeProgramCountTooLarge: program signal-count entries exceed {MAX_RUNTIME_PROGRAMS}. Fix: split the eval frame into bounded batches."
        ));
    }
    let byte_len = len
        .checked_mul(4)
        .ok_or("Fix: keep usize slice length small enough to compute byte length.")?;
    let bytes = read_bytes(input, offset, byte_len)?;
    let leaked: &'static [usize] = vec_to_leaked_slice(
        bytes
            .chunks_exact(4)
            .map(|c: &[u8]| read_u32_chunk(c).map(|value| value as usize))
            .collect::<Result<Vec<_>, _>>()?,
    );
    Ok(&leaked[..len])
}

fn read_u32_pairs<'a>(input: &'a [u8], offset: &mut usize) -> Result<&'a [[u32; 2]], String> {
    let len = read_u32(input, offset)? as usize;
    if len > MAX_EVAL_SLICE_WORDS {
        return Err(format!(
            "EvalSliceTooLarge: u32 pair slice length exceeds {MAX_EVAL_SLICE_WORDS}. Fix: split the eval frame into bounded batches."
        ));
    }
    let byte_len = len
        .checked_mul(8)
        .ok_or("Fix: keep u32 pair slice length small enough to compute byte length.")?;
    let bytes = read_bytes(input, offset, byte_len)?;
    let mut vec = Vec::with_capacity(len);
    for chunk in bytes.chunks_exact(8) {
        vec.push([read_u32_chunk(&chunk[0..4])?, read_u32_chunk(&chunk[4..8])?]);
    }
    let leaked: &'static [[u32; 2]] = vec_to_leaked_slice(vec);
    Ok(&leaked[..len])
}

fn read_file_ctx(input: &[u8], offset: &mut usize) -> Result<ConformFileContext, String> {
    Ok(ConformFileContext {
        file_size: read_u32(input, offset)?,
        entropy_bucket: read_u32(input, offset)?,
        magic_u32: read_u32(input, offset)?,
        is_pe: read_u32(input, offset)?,
        is_dll: read_u32(input, offset)?,
        is_64bit: read_u32(input, offset)?,
        has_signature: read_u32(input, offset)?,
        num_sections: read_u32(input, offset)?,
        num_imports: read_u32(input, offset)?,
        entry_point_rva: read_u32(input, offset)?,
        unique_pattern_count: read_u32(input, offset)?,
        total_match_count: read_u32(input, offset)?,
        file_age_seconds: read_u32(input, offset)?,
    })
}

fn vec_to_leaked_slice<T>(v: Vec<T>) -> &'static [T] {
    Box::leak(v.into_boxed_slice())
}

fn read_u32_chunk(bytes: &[u8]) -> Result<u32, String> {
    let array: [u8; 4] = bytes
        .try_into()
        .map_err(|_| "Fix: decode u32 values from exactly four bytes.".to_string())?;
    Ok(u32::from_le_bytes(array))
}