vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
//! String interner — a deterministic workgroup-local symbol table.
//!
//! Lexers and parsers need stable ids for identifiers, keywords, and literals.
//! The string interner provides that without heap allocation: a fixed slot
//! table with linear probing and a shared byte pool, both living in
//! workgroup-local SRAM.  The id-0 sentinel is reserved for the empty string.
//! The CPU reference uses the exact same FNV-1a hash, probe sequence, and
//! capacity limits as the WGSL kernel, so conform can prove the tables are
//! byte-identical across host and device.

use crate::ir::DataType;
use crate::ir::transform::compiler::{U32X4_INPUTS, U32_OUTPUTS};
use crate::lower::wgsl::compiler::wgsl_backend;
use crate::ops::{AlgebraicLaw, IntrinsicDescriptor, OpSpec};
use thiserror::Error;

/// Portable WGSL source for the string interner primitive.
#[must_use]
pub const fn source() -> &'static str {
    include_str!("../../../lower/wgsl/compiler/string_interner.wgsl")
}

 const _: &[DataType] = U32X4_INPUTS;

/// Slot entry describing an interned string slice.
///
/// Entries store a 32-bit FNV-1a hash, the byte offset and length inside the
/// shared byte pool, and the stable non-zero intern id assigned on first
/// insertion.  The fields are `pub(crate)` because the public API is through
/// `StringInterner::intern` and `StringInterner::lookup`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Entry {
    pub(crate) hash: u32,
    pub(crate) offset: usize,
    pub(crate) len: usize,
    pub(crate) id: u32,
}

/// FNV-1a 32-bit hash used by CPU and WGSL references.
#[must_use]
pub fn fnv1a32(bytes: &[u8]) -> u32 {
    let mut hash = 0x811c_9dc5u32;
    for &byte in bytes {
        hash ^= u32::from(byte);
        hash = hash.wrapping_mul(0x0100_0193);
    }
    hash
}

impl StringInterner {
    /// Create an interner with a fixed slot capacity and a fixed byte
    /// storage capacity. Both are bounded so the entire interner fits
    /// in workgroup SRAM on the GPU side; the CPU reference mirrors
    /// that bound exactly.
    #[must_use]
    pub fn new(slot_capacity: usize, byte_capacity: usize) -> Self {
        Self {
            slots: vec![None; slot_capacity],
            bytes: Vec::with_capacity(byte_capacity),
            byte_capacity,
            next_id: 1,
        }
    }

    /// Intern `bytes` and return a stable non-zero intern id. Empty
    /// input always resolves to id `0` (the sentinel) without
    /// consuming a slot.
    ///
    /// # Errors
    ///
    /// Returns a `Fix: ...` error when the slot table is full, the
    /// byte pool is exhausted, or a slot index cannot fit in `u32`.
    pub fn intern(&mut self, input: &[u8]) -> Result<u32, StringInternerError> {
        if input.is_empty() {
            return Ok(0);
        }
        if self.slots.is_empty() {
            return Err(StringInternerError::TableFull);
        }
        let hash = fnv1a32(input);
        let start = usize::try_from(hash).map_err(|_| StringInternerError::IndexOverflow)?
            % self.slots.len();
        for probe in 0..self.slots.len() {
            let slot = (start + probe) % self.slots.len();
            match &self.slots[slot] {
                Some(entry) if entry.hash == hash && self.entry_bytes(entry) == input => {
                    return Ok(entry.id)
                }
                Some(_) => {}
                None => {
                    if self
                        .bytes
                        .len()
                        .checked_add(input.len())
                        .is_none_or(|total| total > self.byte_capacity)
                    {
                        return Err(StringInternerError::BytePoolFull);
                    }
                    let offset = self.bytes.len();
                    self.bytes.extend_from_slice(input);
                    let id = self.next_id;
                    self.next_id = self
                        .next_id
                        .checked_add(1)
                        .ok_or(StringInternerError::IndexOverflow)?;
                    self.slots[slot] = Some(Entry {
                        hash,
                        offset,
                        len: input.len(),
                        id,
                    });
                    return Ok(id);
                }
            }
        }
        Err(StringInternerError::TableFull)
    }

    /// Reverse lookup: given an id from a prior `intern` call on this
    /// interner, return the bytes that produced it. Returns `None` if
    /// the id is unknown; `Some(&[])` if the id is the empty-string
    /// sentinel `0`.
    #[must_use]
    pub fn lookup(&self, id: u32) -> Option<&[u8]> {
        if id == 0 {
            return Some(&[]);
        }
        for entry in self.slots.iter().flatten() {
            if entry.id == id {
                return Some(self.entry_bytes(entry));
            }
        }
        None
    }

    pub(crate) fn entry_bytes(&self, entry: &Entry) -> &[u8] {
        &self.bytes[entry.offset..entry.offset + entry.len]
    }
}

impl StringInternerOp {
    /// Declarative operation specification.
    pub const SPEC: OpSpec = OpSpec::intrinsic(
        "compiler_primitives.string_interner",
        U32X4_INPUTS,
        U32_OUTPUTS,
        LAWS,
        wgsl_backend,
        IntrinsicDescriptor::new(
            "compiler_primitives_string_interner",
            "workgroup_atomic_sram",
            crate::ops::cpu_op::structured_intrinsic_cpu,
        ),
    );
}

/// Intern all byte strings into a fresh fixed-capacity table.
///
/// Byte capacity is derived as `slot_capacity * 64` which is the
/// default upper bound assumed by the WGSL kernel's workgroup SRAM
/// layout. Callers that need a different budget should instantiate
/// `StringInterner::new(slot_capacity, byte_capacity)` directly.
///
/// # Errors
///
/// Returns `Fix: ...` when insertion cannot complete with linear probing.
pub fn intern_all(inputs: &[&[u8]], slot_capacity: usize) -> Result<Vec<u32>, StringInternerError> {
    let byte_capacity = slot_capacity.saturating_mul(64);
    let mut interner = StringInterner::new(slot_capacity, byte_capacity);
    inputs.iter().map(|bytes| interner.intern(bytes)).collect()
}

/// Algebraic laws declared by the string-interner primitive.
pub const LAWS: &[AlgebraicLaw] = &[AlgebraicLaw::Bounded {
    lo: 0,
    hi: u32::MAX,
}];

/// Deterministic workgroup-local intern table.
///
/// Models a workgroup-SRAM interner that holds up to `slot_capacity`
/// unique strings sharing a bounded byte storage pool of
/// `byte_capacity` bytes. This exactly matches the WGSL lowering which
/// allocates a fixed slot table and a fixed byte arena. The id-0
/// sentinel is reserved for "empty string" so callers can map missing
/// entries to a well-known value.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct StringInterner {
    pub(crate) slots: Vec<Option<Entry>>,
    pub(crate) bytes: Vec<u8>,
    pub(crate) byte_capacity: usize,
    pub(crate) next_id: u32,
}

/// String interner validation errors.
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum StringInternerError {
    /// Linear probing exhausted the bounded slot table.
    #[error("InternerTableFull: no SRAM slot accepted the string. Fix: increase table slots or split the lexing batch.")]
    TableFull,
    /// The byte storage pool is full — the total bytes of interned
    /// strings would exceed the `byte_capacity` declared at
    /// construction time.
    #[error("InternerBytePoolFull: byte storage is exhausted. Fix: raise byte_capacity or split the lexing batch.")]
    BytePoolFull,
    /// Intern id or table index cannot fit in `u32`.
    #[error("InternerIndexOverflow: table slot cannot fit u32 intern id. Fix: lower the workgroup interner capacity.")]
    IndexOverflow,
}

/// Category C string interner intrinsic.
#[derive(Debug, Default, Clone, Copy)]
pub struct StringInternerOp;

/// Workgroup size used by the reference WGSL lowering.
pub const WORKGROUP_SIZE: [u32; 3] = [64, 1, 1];