pub const MAX_DP_CELLS: usize = 16 * 1024 * 1024;
pub const MAX_INPUT_LEN: usize = 4 * 1024 * 1024;
pub const MAX_NGRAM_LEN: usize = 4096;
pub type SimilarityError = String;
pub fn to_u32(value: usize, label: &str) -> Result<u32, SimilarityError> {
u32::try_from(value)
.map_err(|_| format!("Fix: reduce inputs so {label}={value} fits in U32"))
}
pub fn validate_dp_product(a_len: usize, b_len: usize) -> Result<(), SimilarityError> {
let rows = a_len.checked_add(1).ok_or_else(|| {
format!("Fix: reduce len(a); len(a)+1 overflowed for len(a)={a_len}")
})?;
let cols = b_len.checked_add(1).ok_or_else(|| {
format!("Fix: reduce len(b); len(b)+1 overflowed for len(b)={b_len}")
})?;
let cells = rows.checked_mul(cols).ok_or_else(|| {
format!("Fix: reduce input lengths; DP cell product overflowed for {rows}x{cols}")
})?;
if cells > MAX_DP_CELLS {
return Err(format!(
"Fix: reduce edit-distance inputs so (len(a)+1)*(len(b)+1) <= {MAX_DP_CELLS}, got {cells}"
));
}
Ok(())
}
pub fn validate_input(name: &str, input: &[u8]) -> Result<(), SimilarityError> {
if input.len() > MAX_INPUT_LEN {
return Err(format!(
"Fix: reduce len({name}) to <= {MAX_INPUT_LEN} bytes, got {}",
input.len()
));
}
Ok(())
}
pub fn validate_ngram_len(n: u32) -> Result<usize, SimilarityError> {
let n = usize::try_from(n)
.map_err(|_| format!("Fix: choose n <= {MAX_NGRAM_LEN}, got {n}"))?;
if n == 0 {
return Err("Fix: ngram length n must be greater than zero".to_string());
}
if n > MAX_NGRAM_LEN {
return Err(format!("Fix: choose n <= {MAX_NGRAM_LEN}, got {n}"));
}
Ok(n)
}
pub fn validate_pair(a: &[u8], b: &[u8]) -> Result<(), SimilarityError> {
validate_input("a", a)?;
validate_input("b", b)
}