Skip to main content

terminals_core/substrate/
sematon.rs

1//! Sematon<T> — The Smallest Meaning-Bearing Unit (Rust port).
2//!
3//! S = (T, W, A, H, c, σ) where:
4//!   T = typed payload (centroid embedding of converged cluster)
5//!   W = ConvergenceWitness { R, entropy, converged, step }
6//!   A = p-adic address
7//!   H = Shannon entropy of payload
8//!   c = constructive flag (Deutsch-Marletto invariant)
9//!   σ = FNV-1a shape hash
10
11use serde::{Deserialize, Serialize};
12use super::witness::ConvergenceWitness;
13use super::graph::PadicAddr;
14
15/// The smallest meaning-bearing unit with operational consequence.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct Sematon<T: Clone> {
18    /// Unique identifier.
19    pub id: String,
20    /// The meaning-bearing payload.
21    pub payload: T,
22    /// Convergence witness at extraction time.
23    pub witness: ConvergenceWitness,
24    /// P-adic hierarchical address.
25    pub address: PadicAddr,
26    /// Shannon entropy of payload (bits).
27    pub entropy: f32,
28    /// Semantic density (bits per token).
29    pub density: f32,
30    /// Impedance: Z = H / (ρ × R). Resistance to constructive transformation.
31    pub impedance: f32,
32    /// Deterministic shape hash σ (FNV-1a).
33    pub shape_hash: u32,
34    /// Constructor flag: can participate in further transformations?
35    /// True iff: witness.converged AND entropy > 0 AND payload is non-empty.
36    pub constructive: bool,
37    /// Source surface that produced this sematon.
38    pub source: String,
39}
40
41/// Counter for generating unique sematon IDs.
42static SEMATON_COUNTER: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
43
44impl<T: Clone + Serialize> Sematon<T> {
45    /// Create a new sematon from its components.
46    pub fn new(
47        payload: T,
48        witness: ConvergenceWitness,
49        address: PadicAddr,
50        source: &str,
51    ) -> Self {
52        let counter = SEMATON_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
53        let id = format!("sem_rust_{}", counter);
54
55        // Compute entropy from serialized payload
56        let json = serde_json::to_string(&payload).unwrap_or_default();
57        let entropy = Self::payload_entropy(&json);
58        let density = Self::payload_density(&json, entropy);
59
60        // Impedance: Z = H / (ρ × R)
61        let impedance = if density > 0.0 && witness.r > 0.0 {
62            entropy / (density * witness.r)
63        } else {
64            f32::INFINITY
65        };
66
67        // Shape hash from canonical representation
68        let hash_input = format!(
69            "{}|{}|{:.6}|{}|{}",
70            source,
71            json,
72            witness.r,
73            witness.converged,
74            address.to_string()
75        );
76        let shape_hash = fnv1a_str(&hash_input);
77
78        // Constructor flag: Deutsch-Marletto invariant
79        let constructive = witness.converged
80            && entropy.is_finite()
81            && entropy > 0.0
82            && !json.is_empty()
83            && json != "null";
84
85        Self {
86            id,
87            payload,
88            witness,
89            address,
90            entropy,
91            density,
92            impedance,
93            shape_hash,
94            constructive,
95            source: source.to_string(),
96        }
97    }
98
99    /// Check if this sematon can participate in further transformations.
100    pub fn is_realizable(&self) -> bool {
101        self.constructive
102    }
103
104    fn payload_entropy(json: &str) -> f32 {
105        if json.is_empty() {
106            return 0.0;
107        }
108        let mut freq = [0u32; 256];
109        for &byte in json.as_bytes() {
110            freq[byte as usize] += 1;
111        }
112        let total = json.len() as f32;
113        let mut h = 0.0f32;
114        for &count in &freq {
115            if count > 0 {
116                let p = count as f32 / total;
117                h -= p * p.log2();
118            }
119        }
120        h
121    }
122
123    fn payload_density(json: &str, entropy: f32) -> f32 {
124        // Approximate token count by splitting on non-word boundaries
125        let token_count = json
126            .split(|c: char| c.is_whitespace() || "{}[]\":,".contains(c))
127            .filter(|s| !s.is_empty())
128            .count();
129        if token_count > 0 {
130            entropy / token_count as f32
131        } else {
132            0.0
133        }
134    }
135}
136
137fn fnv1a_str(input: &str) -> u32 {
138    let mut hash = 0x811c_9dc5u32;
139    for &byte in input.as_bytes() {
140        hash ^= byte as u32;
141        hash = hash.wrapping_mul(0x0100_0193);
142    }
143    hash
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn test_sematon_constructive_when_converged() {
152        let w = ConvergenceWitness {
153            r: 0.95,
154            entropy: 0.3,
155            converged: true,
156            step: 10,
157        };
158        let s = Sematon::new(
159            vec![1.0f32, 2.0, 3.0],
160            w,
161            PadicAddr { base: 0, coeff0: 0, coeff1: 1 },
162            "test",
163        );
164        assert!(s.constructive);
165        assert!(s.is_realizable());
166        assert!(s.entropy > 0.0);
167        assert!(s.impedance.is_finite());
168    }
169
170    #[test]
171    fn test_sematon_not_constructive_when_unconverged() {
172        let w = ConvergenceWitness {
173            r: 0.3,
174            entropy: 0.8,
175            converged: false,
176            step: 5,
177        };
178        let s = Sematon::new(
179            "hello".to_string(),
180            w,
181            PadicAddr::default(),
182            "test",
183        );
184        assert!(!s.constructive);
185        assert!(!s.is_realizable());
186    }
187
188    #[test]
189    fn test_sematon_unique_ids() {
190        let w = ConvergenceWitness { r: 0.9, entropy: 0.1, converged: true, step: 1 };
191        let a = Sematon::new(42u32, w, PadicAddr::default(), "test");
192        let b = Sematon::new(42u32, w, PadicAddr::default(), "test");
193        assert_ne!(a.id, b.id);
194    }
195
196    #[test]
197    fn test_sematon_impedance_infinite_when_zero_r() {
198        let w = ConvergenceWitness { r: 0.0, entropy: 0.5, converged: false, step: 0 };
199        let s = Sematon::new(vec![1.0f32], w, PadicAddr::default(), "test");
200        assert!(s.impedance.is_infinite());
201    }
202
203    #[test]
204    fn test_sematon_shape_hash_varies() {
205        let w = ConvergenceWitness { r: 0.9, entropy: 0.1, converged: true, step: 1 };
206        let a = Sematon::new(1u32, w, PadicAddr::default(), "test");
207        let b = Sematon::new(2u32, w, PadicAddr::default(), "test");
208        assert_ne!(a.shape_hash, b.shape_hash);
209    }
210}