vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation

/// Maximum dynamic-programming cells accepted by edit-distance operations.
pub const MAX_DP_CELLS: usize = 16 * 1024 * 1024;

/// Maximum single input length accepted by similarity operations: 4 MiB.
pub const MAX_INPUT_LEN: usize = 4 * 1024 * 1024;

/// Maximum n-gram width accepted by n-gram operations.
pub const MAX_NGRAM_LEN: usize = 4096;

/// Error type for bounded string similarity kernels.
pub type SimilarityError = String;

/// Convert `usize` to `u32` for public distance values.
///
/// # Errors
///
/// Returns an actionable `Fix: ...` error if a value is outside `u32`.
pub fn to_u32(value: usize, label: &str) -> Result<u32, SimilarityError> {
    u32::try_from(value)
        .map_err(|_| format!("Fix: reduce inputs so {label}={value} fits in U32"))
}

/// Validate a dynamic-programming table shape without allocating it.
///
/// # Errors
///
/// Returns an actionable `Fix: ...` error on integer overflow or when the cell
/// count exceeds [`MAX_DP_CELLS`].
pub fn validate_dp_product(a_len: usize, b_len: usize) -> Result<(), SimilarityError> {
    let rows = a_len.checked_add(1).ok_or_else(|| {
        format!("Fix: reduce len(a); len(a)+1 overflowed for len(a)={a_len}")
    })?;
    let cols = b_len.checked_add(1).ok_or_else(|| {
        format!("Fix: reduce len(b); len(b)+1 overflowed for len(b)={b_len}")
    })?;
    let cells = rows.checked_mul(cols).ok_or_else(|| {
        format!("Fix: reduce input lengths; DP cell product overflowed for {rows}x{cols}")
    })?;
    if cells > MAX_DP_CELLS {
        return Err(format!(
            "Fix: reduce edit-distance inputs so (len(a)+1)*(len(b)+1) <= {MAX_DP_CELLS}, got {cells}"
        ));
    }
    Ok(())
}

/// Validate one similarity input against the documented length cap.
///
/// # Errors
///
/// Returns an actionable `Fix: ...` error when the input is too large.
pub fn validate_input(name: &str, input: &[u8]) -> Result<(), SimilarityError> {
    if input.len() > MAX_INPUT_LEN {
        return Err(format!(
            "Fix: reduce len({name}) to <= {MAX_INPUT_LEN} bytes, got {}",
            input.len()
        ));
    }
    Ok(())
}

/// Validate an n-gram width.
///
/// # Errors
///
/// Returns an actionable `Fix: ...` error when `n` is zero or too large.
pub fn validate_ngram_len(n: u32) -> Result<usize, SimilarityError> {
    let n = usize::try_from(n)
        .map_err(|_| format!("Fix: choose n <= {MAX_NGRAM_LEN}, got {n}"))?;
    if n == 0 {
        return Err("Fix: ngram length n must be greater than zero".to_string());
    }
    if n > MAX_NGRAM_LEN {
        return Err(format!("Fix: choose n <= {MAX_NGRAM_LEN}, got {n}"));
    }
    Ok(n)
}

/// Validate a pair of similarity inputs.
///
/// # Errors
///
/// Returns an actionable `Fix: ...` error when either input is too large.
pub fn validate_pair(a: &[u8], b: &[u8]) -> Result<(), SimilarityError> {
    validate_input("a", a)?;
    validate_input("b", b)
}