vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
use crate::ir::DataType;
use crate::ops::{AlgebraicLaw, Backend, IntrinsicDescriptor, OpSpec};

pub const INPUTS: &[DataType] = &[DataType::U32, DataType::U32];

pub const OUTPUTS: &[DataType] = &[DataType::U32, DataType::U32];

pub const LAWS: &[AlgebraicLaw] = &[];

/// Registered op spec for `workgroup.string_interner`.
pub const SPEC: OpSpec = OpSpec::intrinsic(
    "workgroup.string_interner",
    INPUTS,
    OUTPUTS,
    LAWS,
    wgsl_only,
    IntrinsicDescriptor::new(
        "workgroup_string_interner_intern",
        "workgroup-sram-fnv1a-table",
        crate::ops::cpu_op::structured_intrinsic_cpu,
    ),
);

/// Stable symbol handle returned by `intern`.
///
/// The ID is an index into the interner's slot table; `lookup` will
/// return the original bytes for any symbol the interner has seen.
/// Symbols are strictly monotonically increasing by interning order,
/// so the handle can be compared with `==` for identity and used as
/// a dense array key.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct InternedSymbol(pub u32);

/// Error returned by fallible interner operations.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InternError {
    /// The symbol slot table is full.
    OutOfSlots,
    /// The flat byte arena is out of capacity.
    OutOfBytes,
    /// A lookup was issued with an unknown symbol id.
    UnknownSymbol,
}

/// Bounded interning table used as the CPU reference for
/// `workgroup.string_interner`.
///
/// The interner is parameterized by two capacities: the maximum
/// number of distinct symbols and the total number of bytes across
/// all symbol payloads. The WGSL lowering uses the same layout so
/// CPU and GPU agree on exact state after every command.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WorkgroupStringInterner {
    slot_capacity: usize,
    byte_capacity: usize,
    // Each entry is `(byte_offset, byte_len)` into `bytes`.
    slots: Vec<(u32, u32)>,
    bytes: Vec<u8>,
}

impl WorkgroupStringInterner {
    /// Create an empty interner with fixed slot and byte capacities.
    #[must_use]
    pub fn new(slot_capacity: usize, byte_capacity: usize) -> Self {
        Self {
            slot_capacity,
            byte_capacity,
            slots: Vec::with_capacity(slot_capacity),
            bytes: Vec::with_capacity(byte_capacity),
        }
    }

    /// Maximum number of distinct symbols this interner can hold.
    #[must_use]
    pub fn slot_capacity(&self) -> usize {
        self.slot_capacity
    }

    /// Total payload byte capacity.
    #[must_use]
    pub fn byte_capacity(&self) -> usize {
        self.byte_capacity
    }

    /// Number of distinct symbols seen so far.
    #[must_use]
    pub fn len(&self) -> usize {
        self.slots.len()
    }

    /// Whether the interner has seen any symbols.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.slots.is_empty()
    }

    /// Intern `bytes` and return the stable symbol id.
    ///
    /// Re-interning the same byte sequence always returns the same
    /// [`InternedSymbol`] — `intern` is deterministic and idempotent.
    ///
    /// # Errors
    ///
    /// - [`InternError::OutOfSlots`] if the slot table is full and
    ///   the input is not already present.
    /// - [`InternError::OutOfBytes`] if the byte arena cannot fit
    ///   `bytes.len()` more bytes.
    pub fn intern(&mut self, bytes: &[u8]) -> Result<InternedSymbol, InternError> {
        if let Some(existing) = self.lookup_by_bytes(bytes) {
            return Ok(existing);
        }
        if self.slots.len() >= self.slot_capacity {
            return Err(InternError::OutOfSlots);
        }
        if self
            .bytes
            .len()
            .checked_add(bytes.len())
            .is_none_or(|total| total > self.byte_capacity)
        {
            return Err(InternError::OutOfBytes);
        }
        let offset = self.bytes.len() as u32;
        let len = bytes.len() as u32;
        self.bytes.extend_from_slice(bytes);
        let id = self.slots.len() as u32;
        self.slots.push((offset, len));
        Ok(InternedSymbol(id))
    }

    /// Return the byte payload associated with `symbol`.
    ///
    /// # Errors
    ///
    /// Returns [`InternError::UnknownSymbol`] when `symbol` does not
    /// correspond to a live entry.
    pub fn lookup(&self, symbol: InternedSymbol) -> Result<&[u8], InternError> {
        let idx = symbol.0 as usize;
        if idx >= self.slots.len() {
            return Err(InternError::UnknownSymbol);
        }
        let (offset, len) = self.slots[idx];
        let start = offset as usize;
        let end = start + len as usize;
        Ok(&self.bytes[start..end])
    }

    /// Linear scan over the slot table for a byte payload. Returns
    /// `None` when the payload has not been interned yet. The WGSL
    /// lowering replaces this with an FNV-1a indexed probe; the CPU
    /// reference stays O(n) for clarity.
    pub(crate) fn lookup_by_bytes(&self, bytes: &[u8]) -> Option<InternedSymbol> {
        for (idx, (offset, len)) in self.slots.iter().enumerate() {
            let start = *offset as usize;
            let end = start + *len as usize;
            if &self.bytes[start..end] == bytes {
                return Some(InternedSymbol(idx as u32));
            }
        }
        None
    }
}

pub fn wgsl_only(backend: &Backend) -> bool {
    matches!(backend, Backend::Wgsl)
}