1#![forbid(unsafe_code)]
10
11mod canon;
12
13use blake3::Hasher;
14use canon::to_canon_vec;
15use serde::{Deserialize, Serialize};
16use serde_json::Value;
17use std::collections::BTreeMap;
18
19#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
21pub struct SemanticUnit {
22 pub kind: String,
24 pub slots: BTreeMap<String, Value>,
26 pub source_hash: [u8; 32],
28}
29
30impl SemanticUnit {
31 #[must_use]
33 pub fn from_intent(text: &str) -> Self {
34 let norm = normalize(text);
35 let mut slots = BTreeMap::new();
36 slots.insert("utterance".to_string(), Value::String(norm.clone()));
37 let source_hash = blake3::hash(norm.as_bytes()).into();
38 Self {
39 kind: "freeform.intent".to_string(),
40 slots,
41 source_hash,
42 }
43 }
44
45 #[must_use]
47 pub fn canonical_bytes(&self) -> Vec<u8> {
48 to_canon_vec(self)
49 }
50
51 #[must_use]
53 pub fn cid_blake3(&self) -> [u8; 32] {
54 let mut h = Hasher::new();
55 h.update(&self.canonical_bytes());
56 h.finalize().into()
57 }
58}
59
60fn normalize(s: &str) -> String {
61 let mut out = String::with_capacity(s.len());
62 let mut prev_space = false;
63 for ch in s.trim().chars() {
64 if ch.is_whitespace() {
65 if !prev_space {
66 out.push(' ');
67 prev_space = true;
68 }
69 } else {
70 out.push(ch.to_ascii_lowercase());
71 prev_space = false;
72 }
73 }
74 out
75}
76
77#[cfg(test)]
78mod tests {
79 use super::*;
80
81 #[test]
82 fn determinism_basic() {
83 let a = SemanticUnit::from_intent(" Hello WORLD ");
84 let b = SemanticUnit::from_intent("hello world");
85 assert_eq!(a.canonical_bytes(), b.canonical_bytes());
86 assert_eq!(a.cid_blake3(), b.cid_blake3());
87 }
88
89 #[test]
90 fn determinism_whitespace_insensitive() {
91 let a = SemanticUnit::from_intent("grant access to alice");
93 let b = SemanticUnit::from_intent(" grant access to alice ");
94 assert_eq!(a.canonical_bytes(), b.canonical_bytes());
95 }
96
97 #[test]
98 fn cid_is_blake3_of_canonical() {
99 let unit = SemanticUnit::from_intent("test intent");
100 let expected_cid: [u8; 32] = blake3::hash(&unit.canonical_bytes()).into();
101 assert_eq!(unit.cid_blake3(), expected_cid);
102 }
103}