vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
//! Binary append-only witness format.

use fs2::FileExt;
use std::fs::{self, OpenOptions};
use std::io::{self, Write};
use std::path::Path;

/// Current witness format version.
pub const VERSION: u32 = 1;
/// Header size in bytes: version, op hash, timestamp, input length.
pub const HEADER_LEN: usize = 16;

/// Header decoded from one corpus witness.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct WitnessHeader {
    /// Format version.
    pub version: u32,
    /// Stable 32-bit hash of the operation ID.
    pub op_id_hash: u32,
    /// Unix timestamp seconds truncated to 32 bits.
    pub timestamp: u32,
    /// Number of input bytes following the header.
    pub input_len: u32,
}

/// One decoded witness from `witnesses.bin`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CorpusWitness {
    /// Witness header.
    pub header: WitnessHeader,
    /// Input bytes supplied to the operation.
    pub input: Vec<u8>,
    /// Expected output bytes recorded when the witness was appended.
    pub expected_output: Vec<u8>,
}

/// Errors produced while loading, appending, or validating witnesses.
#[derive(Debug, thiserror::Error)]
pub enum WitnessError {
    /// Filesystem error.
    #[error("Fix: witness file I/O failed for {path}: {source}")]
    Io {
        /// Path being accessed.
        path: String,
        /// Underlying I/O error.
        #[source]
        source: io::Error,
    },
    /// Header is shorter than the required 16 bytes.
    #[error("Fix: witness header is {actual} bytes; expected 16 bytes")]
    ShortHeader {
        /// Actual byte count.
        actual: usize,
    },
    /// Unsupported witness format version.
    #[error("Fix: unsupported witness version {version}; expected version 1")]
    Version {
        /// Version found in the file.
        version: u32,
    },
    /// Header op hash does not match the expected operation.
    #[error("Fix: witness op hash {actual:#010x} does not match {expected:#010x}")]
    OpHash {
        /// Expected op hash.
        expected: u32,
        /// Actual op hash.
        actual: u32,
    },
    /// File ended before a complete witness could be decoded.
    #[error("Fix: truncated witness at offset {offset}; needed {needed} bytes, had {remaining}")]
    Truncated {
        /// Byte offset where decoding failed.
        offset: usize,
        /// Number of bytes needed.
        needed: usize,
        /// Number of bytes remaining.
        remaining: usize,
    },
    /// New witness file is not an append-only extension of the previous file.
    #[error("Fix: witness corpus changed before append boundary at byte {offset}")]
    AppendOnly {
        /// First differing byte or truncation boundary.
        offset: usize,
    },
    /// File exceeds the maximum allowed size (1 GiB).
    #[error(
        "Fix: witness file too large for {path} ({size} bytes); max allowed is 1073741824 bytes"
    )]
    FileTooLarge {
        /// Path to the file.
        path: String,
        /// Actual file size.
        size: u64,
    },
}

/// Return the stable 32-bit operation hash stored in witness headers.
#[inline]
pub fn op_id_hash(op_id: &str) -> u32 {
    let mut hash = 0x811C_9DC5_u32;
    for byte in op_id.bytes() {
        hash ^= u32::from(byte);
        hash = hash.wrapping_mul(0x0100_0193);
    }
    hash
}

/// Verify and decode a 16-byte witness header for `op_id`.
#[inline]
pub fn verify_header(header: &[u8], op_id: &str) -> Result<WitnessHeader, WitnessError> {
    let parsed = parse_header(header)?;
    if parsed.op_id_hash != op_id_hash(op_id) {
        return Err(WitnessError::OpHash {
            expected: op_id_hash(op_id),
            actual: parsed.op_id_hash,
        });
    }
    Ok(parsed)
}

/// Read all witnesses from `path`.
#[inline]
pub fn read_all(path: &Path) -> Result<Vec<CorpusWitness>, WitnessError> {
    const MAX_SIZE: u64 = 1024 * 1024 * 1024;

    let metadata = fs::metadata(path).map_err(|source| WitnessError::Io {
        path: path.display().to_string(),
        source,
    })?;

    if metadata.len() > MAX_SIZE {
        return Err(WitnessError::FileTooLarge {
            path: path.display().to_string(),
            size: metadata.len(),
        });
    }

    let bytes = fs::read(path).map_err(|source| WitnessError::Io {
        path: path.display().to_string(),
        source,
    })?;
    decode_all(&bytes)
}

/// Append one witness for `op_id` to `path`.
#[inline]
pub fn append_one(
    path: &Path,
    op_id: &str,
    input: &[u8],
    expected_output: &[u8],
    timestamp: u32,
) -> Result<(), WitnessError> {
    if let Some(parent) = path.parent() {
        fs::create_dir_all(parent).map_err(|source| WitnessError::Io {
            path: parent.display().to_string(),
            source,
        })?;
    }

    let mut file = OpenOptions::new()
        .create(true)
        .append(true)
        .open(path)
        .map_err(|source| WitnessError::Io {
            path: path.display().to_string(),
            source,
        })?;
    file.lock_exclusive().map_err(|source| WitnessError::Io {
        path: path.display().to_string(),
        source,
    })?;

    let input_len = u32::try_from(input.len()).map_err(|_| WitnessError::Truncated {
        offset: 0,
        needed: input.len(),
        remaining: u32::MAX as usize,
    })?;
    let output_len = u32::try_from(expected_output.len()).map_err(|_| WitnessError::Truncated {
        offset: 0,
        needed: expected_output.len(),
        remaining: u32::MAX as usize,
    })?;

    file.write_all(&VERSION.to_le_bytes())
        .and_then(|()| file.write_all(&op_id_hash(op_id).to_le_bytes()))
        .and_then(|()| file.write_all(&timestamp.to_le_bytes()))
        .and_then(|()| file.write_all(&input_len.to_le_bytes()))
        .and_then(|()| file.write_all(input))
        .and_then(|()| file.write_all(&output_len.to_le_bytes()))
        .and_then(|()| file.write_all(expected_output))
        .and_then(|()| file.flush())
        .and_then(|()| file.sync_data())
        .map_err(|source| WitnessError::Io {
            path: path.display().to_string(),
            source,
        })
}

/// Verify `new_bytes` only appends to `previous_bytes`.
#[inline]
pub fn verify_append_only(previous_bytes: &[u8], new_bytes: &[u8]) -> Result<(), WitnessError> {
    if new_bytes.len() < previous_bytes.len() {
        return Err(WitnessError::AppendOnly {
            offset: new_bytes.len(),
        });
    }
    if !new_bytes.starts_with(previous_bytes) {
        let offset = previous_bytes
            .iter()
            .zip(new_bytes.iter())
            .position(|(left, right)| left != right)
            .unwrap_or(previous_bytes.len());
        return Err(WitnessError::AppendOnly { offset });
    }
    decode_all(new_bytes).map(|_| ())
}

fn decode_all(bytes: &[u8]) -> Result<Vec<CorpusWitness>, WitnessError> {
    let mut offset = 0;
    let mut witnesses = Vec::new();
    while offset < bytes.len() {
        ensure_available(bytes, offset, HEADER_LEN)?;
        let header = parse_header(&bytes[offset..offset + HEADER_LEN])?;
        offset += HEADER_LEN;

        let input_len = header.input_len as usize;
        ensure_available(bytes, offset, input_len)?;
        let input = bytes[offset..offset + input_len].to_vec();
        offset += input_len;

        ensure_available(bytes, offset, 4)?;
        let output_len = read_u32(bytes, offset) as usize;
        offset += 4;

        ensure_available(bytes, offset, output_len)?;
        let expected_output = bytes[offset..offset + output_len].to_vec();
        offset += output_len;

        witnesses.push(CorpusWitness {
            header,
            input,
            expected_output,
        });
    }
    Ok(witnesses)
}

fn parse_header(header: &[u8]) -> Result<WitnessHeader, WitnessError> {
    if header.len() < HEADER_LEN {
        return Err(WitnessError::ShortHeader {
            actual: header.len(),
        });
    }

    let parsed = WitnessHeader {
        version: read_u32(header, 0),
        op_id_hash: read_u32(header, 4),
        timestamp: read_u32(header, 8),
        input_len: read_u32(header, 12),
    };
    if parsed.version != VERSION {
        return Err(WitnessError::Version {
            version: parsed.version,
        });
    }
    Ok(parsed)
}

fn read_u32(bytes: &[u8], offset: usize) -> u32 {
    u32::from_le_bytes([
        bytes[offset],
        bytes[offset + 1],
        bytes[offset + 2],
        bytes[offset + 3],
    ])
}

fn ensure_available(bytes: &[u8], offset: usize, needed: usize) -> Result<(), WitnessError> {
    let remaining = bytes.len().saturating_sub(offset);
    if remaining < needed {
        return Err(WitnessError::Truncated {
            offset,
            needed,
            remaining,
        });
    }
    Ok(())
}