vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
use crate::ops::string_similarity::validation::{to_u32, validate_input, validate_ngram_len, SimilarityError};
use std::collections::BTreeMap;

// CPU reference kernel for `string_similarity.ngram_histogram`.



/// Count overlapping byte n-grams in lexicographic key order.
///
/// # Errors
///
/// Returns `Fix: ...` when input or `n` exceeds the documented cap, when
/// `n == 0`, or when a count cannot fit in `U32`.
pub fn ngram_histogram(input: &[u8], n: u32) -> Result<Vec<(Vec<u8>, u32)>, SimilarityError> {
    validate_input("input", input)?;
    let n = validate_ngram_len(n)?;
    if n > input.len() {
        return Ok(Vec::new());
    }
    let mut counts: BTreeMap<Vec<u8>, usize> = BTreeMap::new();
    for gram in input.windows(n) {
        *counts.entry(gram.to_vec()).or_insert(0) += 1;
    }
    counts
        .into_iter()
        .map(|(gram, count)| Ok((gram, to_u32(count, "ngram count")?)))
        .collect()
}

// Backend-specific lowering marker.

// WGSL lowering source for `string_similarity.ngram_histogram`.

/// Lexicographic n-gram histogram kernel used by GPU parity tests and intrinsic dispatch.
pub const WGSL: &str = concat!(
    include_str!("../wgsl/common_params.wgsl"),
    "\n",
    include_str!("wgsl/ngram_histogram.wgsl"),
);