tdln_ast/
lib.rs

1//! TDLN — Canonical AST for deterministic translation of NL/DSL into Logical Atoms.
2//!
3//! Invariants:
4//! - Canonical bytes are deterministic for the same semantic content
5//! - CID = `BLAKE3(canonical_bytes)`
6//!
7//! Canonicalization delegates to `json_atomic` as the single source of truth.
8
9#![forbid(unsafe_code)]
10
11mod canon;
12
13use blake3::Hasher;
14use canon::to_canon_vec;
15use serde::{Deserialize, Serialize};
16use serde_json::Value;
17use std::collections::BTreeMap;
18
19/// Minimal AST node representing a canonical semantic intent.
20#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
21pub struct SemanticUnit {
22    /// Kind of intent (e.g., "policy.allow", "freeform.intent").
23    pub kind: String,
24    /// Named slots after resolution (normalized, deterministic order in canon).
25    pub slots: BTreeMap<String, Value>,
26    /// Hash of the *source* text (not the canonical form). BLAKE3-32.
27    pub source_hash: [u8; 32],
28}
29
30impl SemanticUnit {
31    /// Naive builder from raw input (normalize whitespace to single spaces).
32    #[must_use]
33    pub fn from_intent(text: &str) -> Self {
34        let norm = normalize(text);
35        let mut slots = BTreeMap::new();
36        slots.insert("utterance".to_string(), Value::String(norm.clone()));
37        let source_hash = blake3::hash(norm.as_bytes()).into();
38        Self {
39            kind: "freeform.intent".to_string(),
40            slots,
41            source_hash,
42        }
43    }
44
45    /// Canonical JSON bytes via `json_atomic` — single source of truth.
46    #[must_use]
47    pub fn canonical_bytes(&self) -> Vec<u8> {
48        to_canon_vec(self)
49    }
50
51    /// CID of canonical bytes = BLAKE3-32.
52    #[must_use]
53    pub fn cid_blake3(&self) -> [u8; 32] {
54        let mut h = Hasher::new();
55        h.update(&self.canonical_bytes());
56        h.finalize().into()
57    }
58}
59
60fn normalize(s: &str) -> String {
61    let mut out = String::with_capacity(s.len());
62    let mut prev_space = false;
63    for ch in s.trim().chars() {
64        if ch.is_whitespace() {
65            if !prev_space {
66                out.push(' ');
67                prev_space = true;
68            }
69        } else {
70            out.push(ch.to_ascii_lowercase());
71            prev_space = false;
72        }
73    }
74    out
75}
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    #[test]
82    fn determinism_basic() {
83        let a = SemanticUnit::from_intent("  Hello   WORLD ");
84        let b = SemanticUnit::from_intent("hello world");
85        assert_eq!(a.canonical_bytes(), b.canonical_bytes());
86        assert_eq!(a.cid_blake3(), b.cid_blake3());
87    }
88
89    #[test]
90    fn determinism_whitespace_insensitive() {
91        // Same semantic content, different whitespace → identical canonical bytes
92        let a = SemanticUnit::from_intent("grant access to alice");
93        let b = SemanticUnit::from_intent("  grant   access   to   alice  ");
94        assert_eq!(a.canonical_bytes(), b.canonical_bytes());
95    }
96
97    #[test]
98    fn cid_is_blake3_of_canonical() {
99        let unit = SemanticUnit::from_intent("test intent");
100        let expected_cid: [u8; 32] = blake3::hash(&unit.canonical_bytes()).into();
101        assert_eq!(unit.cid_blake3(), expected_cid);
102    }
103}