use crate::index::Sha256;
#[aristo::intent(
"Whitespace differences in annotation text — leading, trailing, or \
runs collapsed to one space — do not change the text hash. \
Reformatting prose is not drift.",
verify = "test",
id = "text_hash_normalizes_whitespace"
)]
pub fn text_hash(text: &str) -> Sha256 {
let normalized = normalize_text(text);
Sha256::from_bytes(normalized.as_bytes())
}
#[aristo::intent(
"Every byte inside the covered region is significant to the body \
hash. Identical hash means byte-identical region; any difference, \
including whitespace, is drift.",
verify = "test",
id = "body_hash_is_verbatim"
)]
pub fn body_hash(body: &str) -> Sha256 {
Sha256::from_bytes(body.as_bytes())
}
fn normalize_text(text: &str) -> String {
let mut out = String::with_capacity(text.len());
let mut last_was_ws = false;
for ch in text.trim().chars() {
if ch.is_ascii_whitespace() {
if !last_was_ws {
out.push(' ');
last_was_ws = true;
}
} else {
out.push(ch);
last_was_ws = false;
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn text_hash_is_deterministic() {
let a = text_hash("hello world");
let b = text_hash("hello world");
assert_eq!(a, b);
}
#[test]
fn text_hash_distinguishes_different_inputs() {
assert_ne!(text_hash("hello"), text_hash("world"));
}
#[test]
fn text_hash_returns_canonical_sha256_form() {
let h = text_hash("anything").to_string();
assert!(h.starts_with("sha256:"), "got: {h}");
assert_eq!(h.len(), "sha256:".len() + 64);
Sha256::parse(&h).expect("from_bytes output must round-trip through parse");
}
#[test]
fn text_hash_ignores_leading_and_trailing_whitespace() {
let a = text_hash("hello world");
let b = text_hash(" hello world ");
let c = text_hash("\nhello world\n");
assert_eq!(a, b);
assert_eq!(a, c);
}
#[test]
fn text_hash_collapses_internal_whitespace_runs() {
let a = text_hash("hello world");
let b = text_hash("hello world"); let c = text_hash("hello\tworld"); let d = text_hash("hello\n world"); assert_eq!(a, b);
assert_eq!(a, c);
assert_eq!(a, d, "lint-induced re-wrapping must not change text_hash");
}
#[test]
fn text_hash_distinguishes_word_order() {
assert_ne!(text_hash("hello world"), text_hash("world hello"));
}
#[test]
fn body_hash_is_deterministic() {
let body = "fn x() -> i32 { 42 }";
assert_eq!(body_hash(body), body_hash(body));
}
#[test]
fn body_hash_is_verbatim_no_normalization() {
assert_ne!(
body_hash("fn x() -> i32 { 42 }"),
body_hash("fn x() -> i32 { 42 }"),
"extra spaces inside body must change body_hash"
);
assert_ne!(
body_hash("fn x() {}"),
body_hash(" fn x() {} "),
"leading/trailing whitespace must change body_hash"
);
}
#[test]
fn body_hash_and_text_hash_differ_when_input_has_collapsible_whitespace() {
let s = "fn x() -> i32 { 42 }"; assert_ne!(
text_hash(s),
body_hash(s),
"doubled internal whitespace must hash differently under text vs body"
);
}
}