use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
#[must_use]
pub fn derive_chunk_id(path: &str, rank: usize, snippet_prefix: &str) -> String {
let mut h = DefaultHasher::new();
path.hash(&mut h);
rank.hash(&mut h);
let cap = snippet_prefix
.char_indices()
.nth(64)
.map_or(snippet_prefix.len(), |(i, _)| i);
snippet_prefix[..cap].hash(&mut h);
format!("tmp_{:016x}", h.finish())
}
#[cfg(test)]
mod tests {
use super::derive_chunk_id;
#[test]
fn multibyte_at_truncation_boundary_does_not_panic() {
let snippet = format!("{}— tail", "a".repeat(63));
let id = derive_chunk_id("wiki/x.md", 0, &snippet);
assert!(id.starts_with("tmp_"));
}
#[test]
fn shorter_than_cap_is_stable() {
let a = derive_chunk_id("wiki/x.md", 1, "short snippet");
let b = derive_chunk_id("wiki/x.md", 1, "short snippet");
assert_eq!(a, b);
}
}