vyre-reference 0.1.0

//! Out-of-bounds rules enforced by the parity engine.
//!
//! GPU drivers differ on what happens when a shader indexes past the end of a
//! buffer: some clamp, some return zero, some crash. The reference interpreter
//! eliminates that ambiguity by defining one deterministic behavior — defined-type
//! zero-fill for scalar loads, empty slice for `Bytes`, and silent no-op for stores.
//! Any backend that diverges from these rules fails the conform gate.

use vyre::ir::DataType as IrDataType;

use crate::value::Value;
use vyre::ir::DataType;

/// Typed bytes backing one declared IR buffer.
///
/// This struct exists to give the reference interpreter a single place to enforce
/// stride-correct indexing and OOB semantics, independent of how any GPU driver
/// handles buffer bounds.
#[derive(Debug, Clone)]
pub struct Buffer {
    pub(crate) bytes: Vec<u8>,
    pub(crate) element: IrDataType,
}

impl Buffer {
    pub(crate) fn len(&self) -> u32 {
        // The engine indexes buffers with u32 (WGSL-native). Lengths beyond
        // u32::MAX are unreachable through the IR's index space, so saturate
        // rather than truncating — anything past u32::MAX maps to the OOB
        // tail where loads zero-fill and stores no-op.
        let stride = self.element.min_bytes();
        let count = if stride == 0 {
            self.bytes.len()
        } else {
            self.bytes.len() / stride
        };
        u32::try_from(count).unwrap_or(u32::MAX)
    }
}

pub(crate) fn load(buffer: &Buffer, index: u32) -> Value {
    let stride = buffer.element.min_bytes();
    let ty = ir_to_conform_type(buffer.element.clone());
    if matches!(buffer.element, IrDataType::Bytes) {
        let offset = index as usize;
        if offset > buffer.bytes.len() {
            return Value::Bytes(Vec::new());
        }
        return Value::Bytes(buffer.bytes[offset..].to_vec());
    }
    let Some(offset) = byte_offset(index, stride) else {
        return Value::try_zero_for(ty).unwrap_or(Value::Bytes(Vec::new()));
    };
    if stride == 0 || offset + stride > buffer.bytes.len() {
        return Value::try_zero_for(ty).unwrap_or(Value::Bytes(Vec::new()));
    }
    Value::from_element_bytes(ty.clone(), &buffer.bytes[offset..offset + stride])
        .unwrap_or_else(|_| Value::try_zero_for(ty).unwrap_or(Value::Bytes(Vec::new())))
}

pub(crate) fn store(buffer: &mut Buffer, index: u32, value: &Value) {
    let stride = buffer.element.min_bytes();
    if matches!(buffer.element, IrDataType::Bytes) {
        let offset = index as usize;
        if offset >= buffer.bytes.len() {
            return; // OOB: silent no-op, matching GPU buffer semantics.
        }
        let bytes = value.to_bytes();
        // Clamp to the remaining buffer capacity — GPU buffers cannot grow.
        let available = buffer.bytes.len() - offset;
        let write_len = bytes.len().min(available);
        buffer.bytes[offset..offset + write_len].copy_from_slice(&bytes[..write_len]);
        return;
    }
    let Some(offset) = byte_offset(index, stride) else {
        return;
    };
    if stride == 0 || offset + stride > buffer.bytes.len() {
        return;
    }
    write_element(
        buffer.element.clone(),
        &mut buffer.bytes[offset..offset + stride],
        value,
    );
}

pub(crate) fn atomic_load(buffer: &Buffer, index: u32) -> Option<u32> {
    // Kimi audit finding #7: index must be scaled by the buffer's
    // declared element stride, not hardcoded to 4. A U64 buffer at
    // index 1 sits at byte offset 8, not 4. The previous hardcode
    // caused atomic ops on wider-than-u32 elements to overlap every
    // pair of elements and corrupt the reference semantics.
    let stride = buffer.element.min_bytes().max(4);
    let offset = byte_offset(index, stride)?;
    if offset + 4 > buffer.bytes.len() {
        None
    } else {
        Some(read_u32(&buffer.bytes[offset..offset + 4]))
    }
}

pub(crate) fn atomic_store(buffer: &mut Buffer, index: u32, value: u32) {
    // See atomic_load — stride must come from the element type, not
    // be hardcoded.
    let stride = buffer.element.min_bytes().max(4);
    let Some(offset) = byte_offset(index, stride) else {
        return;
    };
    if offset + 4 <= buffer.bytes.len() {
        write_u32(&mut buffer.bytes[offset..offset + 4], value);
    }
}

fn byte_offset(index: u32, stride: usize) -> Option<usize> {
    (index as usize).checked_mul(stride)
}

fn write_element(element: IrDataType, target: &mut [u8], value: &Value) {
    match element {
        IrDataType::U32 => {
            target.copy_from_slice(&value.to_bytes_width(4)[..4]);
        }
        IrDataType::I32 => {
            target.copy_from_slice(&value.to_bytes_width(4)[..4]);
        }
        IrDataType::Bool => {
            target.copy_from_slice(&value.to_bytes_width(4)[..4]);
        }
        IrDataType::U64 => {
            let bytes = value.to_bytes_width(8);
            target.copy_from_slice(&bytes[..8]);
        }
        IrDataType::F32 => {
            // Value::Float carries an f64; the GPU buffer is four bytes
            // of f32, so narrow via `as f32` before writing. Dropping the
            // upper four bytes of `v.to_le_bytes()` (what the default
            // to_bytes_width path does) would mangle the f32 bit pattern.
            let v = match value {
                Value::Float(v) => *v as f32,
                Value::U32(v) => f32::from_bits(*v),
                _ => 0.0,
            };
            target.copy_from_slice(&v.to_le_bytes());
        }
        IrDataType::Bytes | IrDataType::Vec2U32 | IrDataType::Vec4U32 => {
            let bytes = value.to_bytes_width(target.len());
            let len = target.len().min(bytes.len());
            target[..len].copy_from_slice(&bytes[..len]);
            target[len..].fill(0);
        }
        _ => {
            let bytes = value.to_bytes_width(target.len());
            let len = target.len().min(bytes.len());
            target[..len].copy_from_slice(&bytes[..len]);
            target[len..].fill(0);
        }
    }
}

fn read_u32(bytes: &[u8]) -> u32 {
    u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
}

fn write_u32(bytes: &mut [u8], value: u32) {
    bytes.copy_from_slice(&value.to_le_bytes());
}

fn ir_to_conform_type(ty: IrDataType) -> DataType {
    match ty {
        IrDataType::U32 => DataType::U32,
        IrDataType::I32 => DataType::I32,
        IrDataType::U64 => DataType::U64,
        IrDataType::F32 => DataType::F32,
        IrDataType::Vec2U32 => DataType::Vec2U32,
        IrDataType::Vec4U32 => DataType::Vec4U32,
        IrDataType::Bool => DataType::U32,
        IrDataType::Bytes => DataType::Bytes,
        _ => DataType::Bytes,
    }
}