vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
//! Replay append-only witness corpora.

use crate::verify::corpus::witness::{self, WitnessError};
use std::path::PathBuf;

/// Successful corpus replay summary.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ReplaySuccess {
    /// Operation ID replayed.
    pub op_id: String,
    /// Witnesses executed.
    pub witnesses: usize,
}

/// Details of a single replay mismatch.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ReplayMismatch {
    /// Zero-based witness index.
    pub witness_index: usize,
    /// Input bytes for the failing witness.
    pub input: Vec<u8>,
    /// Expected bytes from the corpus.
    pub expected: Vec<u8>,
    /// Actual bytes produced by the runner.
    pub actual: Vec<u8>,
}

/// Corpus replay failure.
#[derive(Debug, thiserror::Error)]
pub enum ReplayError {
    /// Witness file could not be loaded or decoded.
    #[error("{0}")]
    Witness(#[from] WitnessError),
    /// Operation ID cannot be used as a corpus path component.
    #[error(
        "invalid corpus op_id. Fix: use lowercase ascii letters, digits, underscore, or dot only."
    )]
    InvalidOpId,
    /// The runner produced bytes different from the stored witness output.
    #[error("Fix: corpus replay mismatch for {op_id} at witness {witness_index}")]
    Mismatch {
        /// Operation ID replayed.
        op_id: String,
        /// Zero-based witness index.
        witness_index: usize,
        /// Input bytes for the failing witness.
        input: Vec<u8>,
        /// Expected bytes from the corpus.
        expected: Vec<u8>,
        /// Actual bytes produced by the runner.
        actual: Vec<u8>,
    },
    /// One or more witness mismatches.
    #[error("Fix: corpus replay produced {count} mismatch(es) for {op_id}")]
    MultipleMismatches {
        /// Operation ID replayed.
        op_id: String,
        /// Number of mismatches.
        count: usize,
        /// Mismatch details.
        mismatches: Vec<ReplayMismatch>,
    },
    /// Corpus file contains no witnesses.
    #[error("Fix: corpus file is empty for {op_id}")]
    EmptyCorpus {
        /// Operation ID.
        op_id: String,
    },
}

/// Result type returned by corpus replay.
pub type ReplayReport = Result<ReplaySuccess, ReplayError>;

/// Replay the default corpus file for `op_id` against `op_runner`.
#[inline]
pub fn replay_corpus(op_id: &str, op_runner: impl Fn(&[u8]) -> Vec<u8>) -> ReplayReport {
    replay_corpus_at(default_witness_path(op_id)?, op_id, op_runner)
}

/// Replay a specific witness file for `op_id` against `op_runner`.
#[inline]
pub fn replay_corpus_at(
    path: PathBuf,
    op_id: &str,
    op_runner: impl Fn(&[u8]) -> Vec<u8>,
) -> ReplayReport {
    let witnesses = witness::read_all(&path)?;
    if witnesses.is_empty() {
        return Err(ReplayError::EmptyCorpus {
            op_id: op_id.to_string(),
        });
    }
    let mut mismatches = Vec::new();
    for (witness_index, item) in witnesses.iter().enumerate() {
        witness::verify_header(&header_bytes(item), op_id)?;
        let actual = op_runner(&item.input);
        if actual != item.expected_output {
            mismatches.push(ReplayMismatch {
                witness_index,
                input: item.input.clone(),
                expected: item.expected_output.clone(),
                actual,
            });
        }
    }
    if !mismatches.is_empty() {
        return Err(ReplayError::MultipleMismatches {
            op_id: op_id.to_string(),
            count: mismatches.len(),
            mismatches,
        });
    }

    Ok(ReplaySuccess {
        op_id: op_id.to_string(),
        witnesses: witnesses.len(),
    })
}

/// Return the default witness path for a sanitized operation ID.
#[inline]
pub fn default_witness_path(op_id: &str) -> Result<PathBuf, ReplayError> {
    validate_op_id(op_id)?;
    Ok(PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .join("corpus")
        .join(op_id)
        .join("witnesses.bin"))
}

fn validate_op_id(op_id: &str) -> Result<(), ReplayError> {
    if op_id.is_empty()
        || !op_id
            .bytes()
            .all(|b| b.is_ascii_lowercase() || b.is_ascii_digit() || b == b'_' || b == b'.')
        || op_id.contains("..")
        || op_id.starts_with('.')
    {
        return Err(ReplayError::InvalidOpId);
    }
    Ok(())
}

fn header_bytes(item: &witness::CorpusWitness) -> [u8; witness::HEADER_LEN] {
    let mut out = [0_u8; witness::HEADER_LEN];
    out[0..4].copy_from_slice(&item.header.version.to_le_bytes());
    out[4..8].copy_from_slice(&item.header.op_id_hash.to_le_bytes());
    out[8..12].copy_from_slice(&item.header.timestamp.to_le_bytes());
    out[12..16].copy_from_slice(&item.header.input_len.to_le_bytes());
    out
}