use crate::index::{AnnotationId, AnnotationKind};
const READABLE_TARGET_LEN: usize = 32;
const OPAQUE_BODY_LEN: usize = 8;
const OPAQUE_ALPHABET: &[u8] = b"abcdefghjkmnpqrstuvwxyz23456789";
pub fn snake_case_from_text(text: &str) -> Option<String> {
let mut out = String::new();
let mut last_was_underscore = true;
for ch in text.chars() {
if out.len() >= READABLE_TARGET_LEN {
break;
}
if ch.is_ascii_alphanumeric() {
out.push(ch.to_ascii_lowercase());
last_was_underscore = false;
} else if !last_was_underscore {
out.push('_');
last_was_underscore = true;
}
}
while out.ends_with('_') {
out.pop();
}
if out.is_empty() {
return None;
}
if !out.starts_with(|c: char| c.is_ascii_lowercase() || c == '_') {
out.insert_str(0, "n_");
}
Some(out)
}
fn kind_tag(kind: AnnotationKind) -> &'static [u8] {
match kind {
AnnotationKind::Intent => b"intent",
AnnotationKind::Assume => b"assume",
}
}
pub fn id_bucket_key(
kind: AnnotationKind,
text: &str,
site_label: &str,
) -> (AnnotationKind, String, String) {
(
kind,
crate::hash::normalize_text(text),
site_label.to_string(),
)
}
#[aristo::intent(
"A stamp-assigned id is a pure function of the annotation's identity — \
its kind, its whitespace-normalized text, and its enclosing site label \
(plus a source-order ordinal only when those three collide). The same \
annotation therefore mints the same id on every `aristo stamp`, which \
is what lets the index re-associate its prior status and proof instead \
of treating it as removed-then-new. Editing the covered CODE does not \
change the id — that is body-hash drift, tracked separately; only \
rewording the claim or renaming/moving the enclosing item does. The id \
stays inside the `aret_` namespace charset so it always parses.",
verify = "test",
id = "deterministic_id_is_pure_function_of_identity"
)]
pub fn deterministic_id(
kind: AnnotationKind,
text: &str,
site_label: &str,
ordinal: usize,
) -> AnnotationId {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(kind_tag(kind));
hasher.update([0u8]);
hasher.update(crate::hash::normalize_text(text).as_bytes());
hasher.update([0u8]);
hasher.update(site_label.as_bytes());
if ordinal > 0 {
hasher.update([0u8]);
hasher.update((ordinal as u64).to_le_bytes());
}
let digest = hasher.finalize();
let mut s = String::with_capacity("aret_".len() + OPAQUE_BODY_LEN);
s.push_str("aret_");
for &byte in digest.iter().take(OPAQUE_BODY_LEN) {
let idx = (byte as usize) % OPAQUE_ALPHABET.len();
s.push(OPAQUE_ALPHABET[idx] as char);
}
AnnotationId::parse(&s).expect("deterministic-id alphabet stays inside aret_ namespace charset")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extracts_readable_id_from_typical_intent_text() {
let id = snake_case_from_text("the function returns the input plus one")
.expect("readable id should derive from normal English");
assert!(id.starts_with("the_function_returns_the"), "got: {id}");
assert!(id.len() <= READABLE_TARGET_LEN);
}
#[test]
fn collapses_runs_of_non_alphanumeric_into_single_underscore() {
let id = snake_case_from_text("hello, world!! how are you?").unwrap();
assert_eq!(id, "hello_world_how_are_you");
}
#[test]
fn lowercases_uppercase_input() {
let id = snake_case_from_text("Balance NoDuplicateCells").unwrap();
assert_eq!(id, "balance_noduplicatecells");
}
#[test]
fn caps_at_target_length() {
let long = "a".repeat(200);
let id = snake_case_from_text(&long).unwrap();
assert!(id.len() <= READABLE_TARGET_LEN, "got len {}", id.len());
}
#[test]
fn returns_none_for_empty_input() {
assert_eq!(snake_case_from_text(""), None);
}
#[test]
fn returns_none_for_punctuation_only() {
assert_eq!(snake_case_from_text("!!! ??? ..."), None);
}
#[test]
fn returns_none_for_non_ascii_only() {
assert_eq!(snake_case_from_text("你好世界"), None);
}
#[test]
fn prepends_n_underscore_when_text_starts_with_digit() {
let id = snake_case_from_text("42 is the answer").unwrap();
assert!(id.starts_with("n_"), "got: {id}");
AnnotationId::parse(&id).expect("output should be a valid local AnnotationId");
}
#[test]
fn output_is_a_valid_annotation_id() {
for input in [
"the function returns x",
"Balance no duplicate cells",
"BTreeMap maintains sort order",
" whitespace trimmed ",
"weird,,,,,punctuation:::",
"42 starts with digit",
] {
let id = snake_case_from_text(input).unwrap();
AnnotationId::parse(&id)
.unwrap_or_else(|e| panic!("input {input:?} → {id:?} failed parse: {e}"));
}
}
fn det(kind: AnnotationKind, text: &str, site: &str, ord: usize) -> String {
deterministic_id(kind, text, site, ord).as_str().to_owned()
}
#[test]
fn deterministic_id_has_aret_prefix_and_length() {
let id = deterministic_id(AnnotationKind::Intent, "some claim", "fn foo", 0);
assert!(id.as_str().starts_with("aret_"), "got: {id}");
assert_eq!(id.as_str().len(), "aret_".len() + OPAQUE_BODY_LEN);
}
#[test]
fn deterministic_id_uses_safe_alphabet_only() {
let id = deterministic_id(AnnotationKind::Assume, "another claim", "struct Bar", 3);
let body = id.as_str().strip_prefix("aret_").unwrap();
for ch in body.chars() {
assert!(
OPAQUE_ALPHABET.contains(&(ch as u8)),
"deterministic id contains char `{ch}` not in alphabet"
);
}
}
#[test]
fn deterministic_id_output_always_parses() {
for (kind, text, site, ord) in [
(
AnnotationKind::Intent,
"the function returns x",
"fn f",
0usize,
),
(AnnotationKind::Assume, "caller holds the lock", "fn g", 5),
(AnnotationKind::Intent, "短い説明", "fn h", 1),
(AnnotationKind::Intent, "", "fn empty_text", 0),
] {
let id = deterministic_id(kind, text, site, ord);
AnnotationId::parse(id.as_str())
.unwrap_or_else(|e| panic!("({kind:?},{text:?},{site:?},{ord}) → {id}: {e}"));
}
}
#[test]
fn deterministic_id_is_stable_for_same_inputs() {
let a = det(
AnnotationKind::Intent,
"balance is preserved",
"fn balance",
0,
);
let b = det(
AnnotationKind::Intent,
"balance is preserved",
"fn balance",
0,
);
assert_eq!(a, b);
}
#[test]
fn deterministic_id_changes_with_text() {
let a = det(AnnotationKind::Intent, "claim one", "fn f", 0);
let b = det(AnnotationKind::Intent, "claim two", "fn f", 0);
assert_ne!(a, b, "rewording the claim must change the id");
}
#[test]
fn deterministic_id_changes_with_site() {
let a = det(AnnotationKind::Intent, "same claim", "fn old_name", 0);
let b = det(AnnotationKind::Intent, "same claim", "fn new_name", 0);
assert_ne!(a, b, "moving to a new site must change the id");
}
#[test]
fn deterministic_id_changes_with_kind() {
let i = det(AnnotationKind::Intent, "same words", "fn f", 0);
let a = det(AnnotationKind::Assume, "same words", "fn f", 0);
assert_ne!(i, a, "kind is part of the identity");
}
#[test]
fn deterministic_id_ignores_text_whitespace() {
let a = det(AnnotationKind::Intent, "hello world", "fn f", 0);
let b = det(AnnotationKind::Intent, " hello world ", "fn f", 0);
let c = det(AnnotationKind::Intent, "hello\n\tworld", "fn f", 0);
assert_eq!(a, b);
assert_eq!(a, c);
}
#[test]
fn deterministic_id_distinct_ordinals_differ() {
let zero = det(AnnotationKind::Intent, "dup", "fn f", 0);
let one = det(AnnotationKind::Intent, "dup", "fn f", 1);
let two = det(AnnotationKind::Intent, "dup", "fn f", 2);
assert_ne!(zero, one);
assert_ne!(one, two);
assert_ne!(zero, two);
}
#[test]
fn id_bucket_key_collapses_whitespace_and_includes_kind() {
let k1 = id_bucket_key(AnnotationKind::Intent, "a b", "fn f");
let k2 = id_bucket_key(AnnotationKind::Intent, "a b", "fn f");
assert_eq!(k1, k2);
let k3 = id_bucket_key(AnnotationKind::Assume, "a b", "fn f");
assert_ne!(k1, k3, "kind is part of the bucket");
}
}