use crate::ops::string_similarity::validation::{to_u32, validate_input, validate_ngram_len, SimilarityError};
use std::collections::BTreeMap;
pub fn ngram_histogram(input: &[u8], n: u32) -> Result<Vec<(Vec<u8>, u32)>, SimilarityError> {
validate_input("input", input)?;
let n = validate_ngram_len(n)?;
if n > input.len() {
return Ok(Vec::new());
}
let mut counts: BTreeMap<Vec<u8>, usize> = BTreeMap::new();
for gram in input.windows(n) {
*counts.entry(gram.to_vec()).or_insert(0) += 1;
}
counts
.into_iter()
.map(|(gram, count)| Ok((gram, to_u32(count, "ngram count")?)))
.collect()
}
pub const WGSL: &str = concat!(
include_str!("../wgsl/common_params.wgsl"),
"\n",
include_str!("wgsl/ngram_histogram.wgsl"),
);