vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
use crate::ops::string_similarity::validation::{to_u32, validate_dp_product, validate_pair, SimilarityError};

// CPU reference kernel for `string_similarity.damerau_levenshtein`.


/// Compute optimal-string-alignment Damerau-Levenshtein distance.
///
/// Adjacent transpositions cost one edit. The full DP table is guarded by the
/// shared cell cap before allocation.
///
/// # Errors
///
/// Returns `Fix: ...` when input sizes exceed caps, when the DP cell product
/// overflows, or when the documented OOM guard is exceeded.
pub fn damerau_levenshtein(a: &[u8], b: &[u8]) -> Result<u32, SimilarityError> {
    validate_pair(a, b)?;
    validate_dp_product(a.len(), b.len())?;
    let rows = a.len() + 1;
    let cols = b.len() + 1;
    let mut dp = vec![0usize; rows * cols];
    for i in 0..rows {
        dp[i * cols] = i;
    }
    for j in 0..cols {
        dp[j] = j;
    }
    for i in 1..rows {
        for j in 1..cols {
            let cost = usize::from(a[i - 1] != b[j - 1]);
            let mut best = (dp[(i - 1) * cols + j] + 1)
                .min(dp[i * cols + j - 1] + 1)
                .min(dp[(i - 1) * cols + j - 1] + cost);
            if i > 1 && j > 1 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1] {
                best = best.min(dp[(i - 2) * cols + j - 2] + 1);
            }
            dp[i * cols + j] = best;
        }
    }
    to_u32(dp[a.len() * cols + b.len()], "damerau levenshtein distance")
}

// Backend-specific lowering marker.

// WGSL lowering unavailable for `string_similarity.damerau_levenshtein`.
//
// Exact CPU parity requires caller-provided dynamic-programming rows with
// adjacent-transposition history, but the current intrinsic ABI exposes only
// `Bytes, Bytes -> U32` and no scratch arena. Fix: keep this op CPU-only until
// the spec defines bounded DP scratch buffers.