Skip to main content

harn_vm/flow/
atom.rs

1//! Harn Flow `Atom` primitive.
2//!
3//! An `Atom` is the smallest invertible, CRDT-clean change in Harn Flow.
4//! Atoms are content-addressed: their `AtomId` is the SHA-256 of a canonical
5//! binary encoding of every field except the id and signature themselves.
6//! Every atom carries signed provenance (`(principal, persona, ...)`) and may
7//! point at the atom it constructively reverses via `inverse_of`.
8//!
9//! Two encodings are supported and round-trip 1:1 with the in-memory struct:
10//! * JSON, via `serde` — human-readable interchange / event-log payloads.
11//! * Canonical binary — deterministic length-prefixed bytes used for hashing,
12//!   signing, and on-disk storage.
13//!
14//! See issue #573 ("[flow] Atom type + signed-provenance schema") and parent
15//! epic #571.
16
17use std::fmt;
18
19use base64::Engine as _;
20use ed25519_dalek::{Signature as Ed25519Signature, Signer, SigningKey, Verifier, VerifyingKey};
21use serde::{Deserialize, Deserializer, Serialize, Serializer};
22use sha2::{Digest, Sha256};
23use time::format_description::well_known::Rfc3339;
24use time::OffsetDateTime;
25
26const ATOM_BINARY_MAGIC: &[u8; 4] = b"FATM";
27const ATOM_BINARY_VERSION: u8 = 1;
28const ATOM_ID_BYTES: usize = 32;
29const ED25519_PUBLIC_KEY_BYTES: usize = 32;
30const ED25519_SIGNATURE_BYTES: usize = 64;
31
32/// Errors produced when constructing, encoding, decoding, signing, or
33/// verifying an `Atom`.
34#[derive(Debug)]
35pub enum AtomError {
36    /// JSON (de)serialization failure.
37    Json(String),
38    /// Canonical binary decode failure (truncation, bad tag, oversize length).
39    Binary(String),
40    /// Atom id did not match the recomputed content hash.
41    ContentHashMismatch { expected: AtomId, actual: AtomId },
42    /// `verify` failed because the principal or persona signature is invalid.
43    InvalidSignature(&'static str),
44    /// Apply/invert error (offset out of range or content mismatch).
45    Apply(String),
46    /// Misc validation error (negative timestamp, malformed key bytes, …).
47    Invalid(String),
48}
49
50impl fmt::Display for AtomError {
51    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52        match self {
53            AtomError::Json(message) => write!(f, "atom json error: {message}"),
54            AtomError::Binary(message) => write!(f, "atom binary error: {message}"),
55            AtomError::ContentHashMismatch { expected, actual } => write!(
56                f,
57                "atom id mismatch: expected {expected}, recomputed {actual}",
58            ),
59            AtomError::InvalidSignature(role) => write!(f, "{role} signature failed verification"),
60            AtomError::Apply(message) => write!(f, "atom apply/invert error: {message}"),
61            AtomError::Invalid(message) => write!(f, "atom invalid: {message}"),
62        }
63    }
64}
65
66impl std::error::Error for AtomError {}
67
68/// 32-byte SHA-256 content address of an `Atom`.
69#[derive(Clone, Copy, PartialEq, Eq, Hash)]
70pub struct AtomId(pub [u8; ATOM_ID_BYTES]);
71
72impl AtomId {
73    /// Produce a hex-encoded representation suitable for logs and JSON.
74    pub fn to_hex(&self) -> String {
75        hex::encode(self.0)
76    }
77
78    /// Parse a 64-character hex string into an `AtomId`.
79    pub fn from_hex(raw: &str) -> Result<Self, AtomError> {
80        let bytes = hex::decode(raw)
81            .map_err(|error| AtomError::Invalid(format!("invalid AtomId hex: {error}")))?;
82        if bytes.len() != ATOM_ID_BYTES {
83            return Err(AtomError::Invalid(format!(
84                "AtomId must be {ATOM_ID_BYTES} bytes, got {}",
85                bytes.len()
86            )));
87        }
88        let mut out = [0u8; ATOM_ID_BYTES];
89        out.copy_from_slice(&bytes);
90        Ok(Self(out))
91    }
92}
93
94impl fmt::Debug for AtomId {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        write!(f, "AtomId({})", self.to_hex())
97    }
98}
99
100impl fmt::Display for AtomId {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        write!(f, "{}", self.to_hex())
103    }
104}
105
106impl Serialize for AtomId {
107    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
108        serializer.serialize_str(&self.to_hex())
109    }
110}
111
112impl<'de> Deserialize<'de> for AtomId {
113    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
114        let raw = String::deserialize(deserializer)?;
115        AtomId::from_hex(&raw).map_err(serde::de::Error::custom)
116    }
117}
118
119/// A single text edit. Atoms hold an ordered list of these.
120///
121/// `Delete` carries the bytes it removes so the inverse operation
122/// (`Insert` of the same content at the same offset) reconstructs the
123/// pre-image without consulting the document.
124#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
125#[serde(tag = "kind", rename_all = "snake_case")]
126pub enum TextOp {
127    /// Insert `content` at byte `offset`.
128    Insert { offset: u64, content: String },
129    /// Delete `content` at byte `offset`. `content` is the removed bytes.
130    Delete { offset: u64, content: String },
131}
132
133impl TextOp {
134    /// Return the constructive inverse: `Insert` ↔ `Delete` with the same
135    /// offset and content.
136    pub fn invert(&self) -> TextOp {
137        match self {
138            TextOp::Insert { offset, content } => TextOp::Delete {
139                offset: *offset,
140                content: content.clone(),
141            },
142            TextOp::Delete { offset, content } => TextOp::Insert {
143                offset: *offset,
144                content: content.clone(),
145            },
146        }
147    }
148
149    /// Apply this op to `document` (as a UTF-8 byte buffer). Returns an error
150    /// if the offset is out of range or if a `Delete`'s recorded content does
151    /// not match what is in the document at that offset.
152    pub fn apply(&self, document: &mut Vec<u8>) -> Result<(), AtomError> {
153        match self {
154            TextOp::Insert { offset, content } => {
155                let offset_usize = usize::try_from(*offset).map_err(|_| {
156                    AtomError::Apply(format!("insert offset {offset} exceeds usize::MAX"))
157                })?;
158                if offset_usize > document.len() {
159                    return Err(AtomError::Apply(format!(
160                        "insert offset {offset_usize} > document length {}",
161                        document.len()
162                    )));
163                }
164                document.splice(offset_usize..offset_usize, content.bytes());
165                Ok(())
166            }
167            TextOp::Delete { offset, content } => {
168                let offset_usize = usize::try_from(*offset).map_err(|_| {
169                    AtomError::Apply(format!("delete offset {offset} exceeds usize::MAX"))
170                })?;
171                let end = offset_usize.checked_add(content.len()).ok_or_else(|| {
172                    AtomError::Apply(format!(
173                        "delete range overflows: offset {offset_usize} + len {}",
174                        content.len()
175                    ))
176                })?;
177                if end > document.len() {
178                    return Err(AtomError::Apply(format!(
179                        "delete range {offset_usize}..{end} exceeds document length {}",
180                        document.len()
181                    )));
182                }
183                if &document[offset_usize..end] != content.as_bytes() {
184                    return Err(AtomError::Apply(format!(
185                        "delete content mismatch at offset {offset_usize}",
186                    )));
187                }
188                document.drain(offset_usize..end);
189                Ok(())
190            }
191        }
192    }
193}
194
195/// Signed provenance of an atom: who emitted it, in what context, and when.
196///
197/// Field shapes intentionally mirror the strings used elsewhere in the runtime
198/// (trust graph, observability spans, persona ledger) so atoms can be joined
199/// against existing event streams without translation.
200#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
201pub struct Provenance {
202    /// Stable principal id (e.g. user account, service identity).
203    pub principal: String,
204    /// Persona name acting on behalf of the principal (e.g. `ship-captain`).
205    pub persona: String,
206    /// Run id of the agent invocation that produced this atom.
207    pub agent_run_id: String,
208    /// Tool-call id within that run, if the atom came from a specific call.
209    #[serde(default, skip_serializing_if = "Option::is_none")]
210    pub tool_call_id: Option<String>,
211    /// Trace id binding this atom to a distributed trace.
212    pub trace_id: String,
213    /// Reference to the transcript that contextualizes this change.
214    pub transcript_ref: String,
215    /// Wall-clock timestamp the atom was created.
216    #[serde(with = "time::serde::rfc3339")]
217    pub timestamp: OffsetDateTime,
218}
219
220impl Provenance {
221    /// Convenience constructor with a default `OffsetDateTime::now_utc()`
222    /// timestamp.
223    pub fn new(
224        principal: impl Into<String>,
225        persona: impl Into<String>,
226        agent_run_id: impl Into<String>,
227        trace_id: impl Into<String>,
228        transcript_ref: impl Into<String>,
229    ) -> Self {
230        Self {
231            principal: principal.into(),
232            persona: persona.into(),
233            agent_run_id: agent_run_id.into(),
234            tool_call_id: None,
235            trace_id: trace_id.into(),
236            transcript_ref: transcript_ref.into(),
237            timestamp: OffsetDateTime::now_utc(),
238        }
239    }
240}
241
242/// Detached Ed25519 signatures over the `AtomId`.
243///
244/// Both keys sign the same payload (the 32-byte AtomId). The principal key
245/// represents the trust-graph identity that is ultimately accountable for the
246/// change; the persona key represents the agent persona that emitted it.
247/// Verification requires *both* signatures to be valid, which lets the trust
248/// graph attribute revocations (per-persona rotation) without invalidating
249/// atoms whose principal is still trusted.
250#[derive(Clone, Copy, PartialEq, Eq)]
251pub struct AtomSignature {
252    pub principal_key: [u8; ED25519_PUBLIC_KEY_BYTES],
253    pub principal_sig: [u8; ED25519_SIGNATURE_BYTES],
254    pub persona_key: [u8; ED25519_PUBLIC_KEY_BYTES],
255    pub persona_sig: [u8; ED25519_SIGNATURE_BYTES],
256}
257
258impl fmt::Debug for AtomSignature {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        f.debug_struct("AtomSignature")
261            .field("principal_key", &hex::encode(self.principal_key))
262            .field("persona_key", &hex::encode(self.persona_key))
263            .finish_non_exhaustive()
264    }
265}
266
267#[derive(Serialize, Deserialize)]
268struct AtomSignatureWire {
269    principal_key: String,
270    principal_sig: String,
271    persona_key: String,
272    persona_sig: String,
273}
274
275impl Serialize for AtomSignature {
276    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
277        let b64 = base64::engine::general_purpose::STANDARD;
278        AtomSignatureWire {
279            principal_key: b64.encode(self.principal_key),
280            principal_sig: b64.encode(self.principal_sig),
281            persona_key: b64.encode(self.persona_key),
282            persona_sig: b64.encode(self.persona_sig),
283        }
284        .serialize(serializer)
285    }
286}
287
288impl<'de> Deserialize<'de> for AtomSignature {
289    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
290        let wire = AtomSignatureWire::deserialize(deserializer)?;
291        let b64 = base64::engine::general_purpose::STANDARD;
292        fn copy_into<const N: usize, E: serde::de::Error>(
293            label: &str,
294            raw: Vec<u8>,
295        ) -> Result<[u8; N], E> {
296            if raw.len() != N {
297                return Err(serde::de::Error::custom(format!(
298                    "{label} must be {N} bytes, got {}",
299                    raw.len()
300                )));
301            }
302            let mut out = [0u8; N];
303            out.copy_from_slice(&raw);
304            Ok(out)
305        }
306        let principal_key_bytes = b64
307            .decode(wire.principal_key.as_bytes())
308            .map_err(serde::de::Error::custom)?;
309        let principal_sig_bytes = b64
310            .decode(wire.principal_sig.as_bytes())
311            .map_err(serde::de::Error::custom)?;
312        let persona_key_bytes = b64
313            .decode(wire.persona_key.as_bytes())
314            .map_err(serde::de::Error::custom)?;
315        let persona_sig_bytes = b64
316            .decode(wire.persona_sig.as_bytes())
317            .map_err(serde::de::Error::custom)?;
318        Ok(AtomSignature {
319            principal_key: copy_into::<ED25519_PUBLIC_KEY_BYTES, D::Error>(
320                "principal_key",
321                principal_key_bytes,
322            )?,
323            principal_sig: copy_into::<ED25519_SIGNATURE_BYTES, D::Error>(
324                "principal_sig",
325                principal_sig_bytes,
326            )?,
327            persona_key: copy_into::<ED25519_PUBLIC_KEY_BYTES, D::Error>(
328                "persona_key",
329                persona_key_bytes,
330            )?,
331            persona_sig: copy_into::<ED25519_SIGNATURE_BYTES, D::Error>(
332                "persona_sig",
333                persona_sig_bytes,
334            )?,
335        })
336    }
337}
338
339/// The core flow primitive.
340///
341/// `id` is derived from the rest of the content; constructors and decoders
342/// recompute and validate it. Two atoms that encode the same ops, parents,
343/// provenance, and inverse_of always have the same id, regardless of which
344/// encoding they were materialized from.
345#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
346pub struct Atom {
347    pub id: AtomId,
348    pub ops: Vec<TextOp>,
349    pub parents: Vec<AtomId>,
350    pub provenance: Provenance,
351    pub signature: AtomSignature,
352    #[serde(default, skip_serializing_if = "Option::is_none")]
353    pub inverse_of: Option<AtomId>,
354}
355
356impl Atom {
357    /// Sign and assemble an atom from its content.
358    ///
359    /// Steps: encode the body canonically, derive `AtomId` as `SHA-256(body)`,
360    /// sign that 32-byte id with both keys, return the assembled atom.
361    pub fn sign(
362        ops: Vec<TextOp>,
363        parents: Vec<AtomId>,
364        provenance: Provenance,
365        inverse_of: Option<AtomId>,
366        principal_key: &SigningKey,
367        persona_key: &SigningKey,
368    ) -> Result<Self, AtomError> {
369        let body_bytes = encode_body_canonical(&ops, &parents, &provenance, &inverse_of)?;
370        let id = AtomId(Sha256::digest(&body_bytes).into());
371        let principal_sig = principal_key.sign(&id.0);
372        let persona_sig = persona_key.sign(&id.0);
373        Ok(Atom {
374            id,
375            ops,
376            parents,
377            provenance,
378            inverse_of,
379            signature: AtomSignature {
380                principal_key: principal_key.verifying_key().to_bytes(),
381                principal_sig: principal_sig.to_bytes(),
382                persona_key: persona_key.verifying_key().to_bytes(),
383                persona_sig: persona_sig.to_bytes(),
384            },
385        })
386    }
387
388    /// Build the inverse of `target`. The new atom's ops are the reverse
389    /// list of `target.ops`, each individually inverted, with `inverse_of`
390    /// pointing at `target.id`. Useful for "undo" — the user-facing model
391    /// stacks an inverse atom rather than mutating history.
392    pub fn invert(
393        target: &Atom,
394        provenance: Provenance,
395        principal_key: &SigningKey,
396        persona_key: &SigningKey,
397    ) -> Result<Self, AtomError> {
398        let ops: Vec<TextOp> = target.ops.iter().rev().map(TextOp::invert).collect();
399        Atom::sign(
400            ops,
401            vec![target.id],
402            provenance,
403            Some(target.id),
404            principal_key,
405            persona_key,
406        )
407    }
408
409    /// Recompute `id` from the body and check that it matches the stored
410    /// `id`. Returns `Err(ContentHashMismatch)` on drift.
411    pub fn verify_content_hash(&self) -> Result<(), AtomError> {
412        let body_bytes =
413            encode_body_canonical(&self.ops, &self.parents, &self.provenance, &self.inverse_of)?;
414        let recomputed = AtomId(Sha256::digest(&body_bytes).into());
415        if recomputed != self.id {
416            return Err(AtomError::ContentHashMismatch {
417                expected: self.id,
418                actual: recomputed,
419            });
420        }
421        Ok(())
422    }
423
424    /// Verify both signatures against the atom's id. Does *not* consult the
425    /// trust graph — that's the caller's responsibility (the trust graph
426    /// decides whether the keys themselves are currently trusted).
427    pub fn verify_signatures(&self) -> Result<(), AtomError> {
428        let signature_payload = self.id.0;
429        let principal_key = VerifyingKey::from_bytes(&self.signature.principal_key)
430            .map_err(|error| AtomError::Invalid(format!("principal key: {error}")))?;
431        let persona_key = VerifyingKey::from_bytes(&self.signature.persona_key)
432            .map_err(|error| AtomError::Invalid(format!("persona key: {error}")))?;
433        let principal_sig = Ed25519Signature::from_bytes(&self.signature.principal_sig);
434        let persona_sig = Ed25519Signature::from_bytes(&self.signature.persona_sig);
435        principal_key
436            .verify(&signature_payload, &principal_sig)
437            .map_err(|_| AtomError::InvalidSignature("principal"))?;
438        persona_key
439            .verify(&signature_payload, &persona_sig)
440            .map_err(|_| AtomError::InvalidSignature("persona"))?;
441        Ok(())
442    }
443
444    /// Combined `verify_content_hash` + `verify_signatures`. Most callers
445    /// want this.
446    pub fn verify(&self) -> Result<(), AtomError> {
447        self.verify_content_hash()?;
448        self.verify_signatures()
449    }
450
451    /// Apply this atom's ops to `document` in order. Returns `Err` if any
452    /// op fails (offset out of range or `Delete` content mismatch).
453    pub fn apply(&self, document: &mut Vec<u8>) -> Result<(), AtomError> {
454        for op in &self.ops {
455            op.apply(document)?;
456        }
457        Ok(())
458    }
459
460    /// Encode as JSON. Round-trips with [`Atom::from_json_slice`].
461    pub fn to_json(&self) -> Result<String, AtomError> {
462        serde_json::to_string(self).map_err(|error| AtomError::Json(error.to_string()))
463    }
464
465    /// Decode from JSON, then verify the content hash (not signatures).
466    pub fn from_json_slice(bytes: &[u8]) -> Result<Self, AtomError> {
467        let atom: Atom =
468            serde_json::from_slice(bytes).map_err(|error| AtomError::Json(error.to_string()))?;
469        atom.verify_content_hash()?;
470        Ok(atom)
471    }
472
473    /// Encode as canonical binary bytes. Round-trips with
474    /// [`Atom::from_binary_slice`] and is byte-stable across processes.
475    pub fn to_binary(&self) -> Result<Vec<u8>, AtomError> {
476        encode_atom_binary(self)
477    }
478
479    /// Decode from canonical binary bytes, then verify the content hash
480    /// (not signatures).
481    pub fn from_binary_slice(bytes: &[u8]) -> Result<Self, AtomError> {
482        let atom = decode_atom_binary(bytes)?;
483        atom.verify_content_hash()?;
484        Ok(atom)
485    }
486}
487
488// ---------------------------------------------------------------------------
489// Canonical binary encoding (deterministic, version-tagged).
490// ---------------------------------------------------------------------------
491
492fn encode_body_canonical(
493    ops: &[TextOp],
494    parents: &[AtomId],
495    provenance: &Provenance,
496    inverse_of: &Option<AtomId>,
497) -> Result<Vec<u8>, AtomError> {
498    let mut out = Vec::new();
499    out.extend_from_slice(ATOM_BINARY_MAGIC);
500    out.push(ATOM_BINARY_VERSION);
501    write_ops(&mut out, ops);
502    write_parents(&mut out, parents);
503    write_provenance(&mut out, provenance)?;
504    write_optional_atom_id(&mut out, inverse_of);
505    Ok(out)
506}
507
508fn encode_atom_binary(atom: &Atom) -> Result<Vec<u8>, AtomError> {
509    let mut out =
510        encode_body_canonical(&atom.ops, &atom.parents, &atom.provenance, &atom.inverse_of)?;
511    out.extend_from_slice(&atom.id.0);
512    out.extend_from_slice(&atom.signature.principal_key);
513    out.extend_from_slice(&atom.signature.principal_sig);
514    out.extend_from_slice(&atom.signature.persona_key);
515    out.extend_from_slice(&atom.signature.persona_sig);
516    Ok(out)
517}
518
519fn write_ops(out: &mut Vec<u8>, ops: &[TextOp]) {
520    write_u32(out, ops.len() as u32);
521    for op in ops {
522        match op {
523            TextOp::Insert { offset, content } => {
524                out.push(0);
525                write_u64(out, *offset);
526                write_bytes(out, content.as_bytes());
527            }
528            TextOp::Delete { offset, content } => {
529                out.push(1);
530                write_u64(out, *offset);
531                write_bytes(out, content.as_bytes());
532            }
533        }
534    }
535}
536
537fn write_parents(out: &mut Vec<u8>, parents: &[AtomId]) {
538    write_u32(out, parents.len() as u32);
539    for parent in parents {
540        out.extend_from_slice(&parent.0);
541    }
542}
543
544fn write_provenance(out: &mut Vec<u8>, provenance: &Provenance) -> Result<(), AtomError> {
545    write_str(out, &provenance.principal);
546    write_str(out, &provenance.persona);
547    write_str(out, &provenance.agent_run_id);
548    match &provenance.tool_call_id {
549        Some(id) => {
550            out.push(1);
551            write_str(out, id);
552        }
553        None => out.push(0),
554    }
555    write_str(out, &provenance.trace_id);
556    write_str(out, &provenance.transcript_ref);
557    let formatted = provenance
558        .timestamp
559        .format(&Rfc3339)
560        .map_err(|error| AtomError::Invalid(format!("timestamp format: {error}")))?;
561    write_str(out, &formatted);
562    Ok(())
563}
564
565fn write_optional_atom_id(out: &mut Vec<u8>, value: &Option<AtomId>) {
566    match value {
567        Some(id) => {
568            out.push(1);
569            out.extend_from_slice(&id.0);
570        }
571        None => out.push(0),
572    }
573}
574
575fn write_bytes(out: &mut Vec<u8>, bytes: &[u8]) {
576    write_u32(out, bytes.len() as u32);
577    out.extend_from_slice(bytes);
578}
579
580fn write_str(out: &mut Vec<u8>, value: &str) {
581    write_bytes(out, value.as_bytes());
582}
583
584fn write_u32(out: &mut Vec<u8>, value: u32) {
585    out.extend_from_slice(&value.to_le_bytes());
586}
587
588fn write_u64(out: &mut Vec<u8>, value: u64) {
589    out.extend_from_slice(&value.to_le_bytes());
590}
591
592// ---------------------------------------------------------------------------
593// Canonical binary decoding.
594// ---------------------------------------------------------------------------
595
596struct Cursor<'a> {
597    bytes: &'a [u8],
598    offset: usize,
599}
600
601impl<'a> Cursor<'a> {
602    fn new(bytes: &'a [u8]) -> Self {
603        Self { bytes, offset: 0 }
604    }
605
606    fn remaining(&self) -> usize {
607        self.bytes.len() - self.offset
608    }
609
610    fn take(&mut self, n: usize) -> Result<&'a [u8], AtomError> {
611        if self.remaining() < n {
612            return Err(AtomError::Binary(format!(
613                "truncated: need {n} bytes, have {}",
614                self.remaining()
615            )));
616        }
617        let slice = &self.bytes[self.offset..self.offset + n];
618        self.offset += n;
619        Ok(slice)
620    }
621
622    fn take_u8(&mut self) -> Result<u8, AtomError> {
623        Ok(self.take(1)?[0])
624    }
625
626    fn take_u32(&mut self) -> Result<u32, AtomError> {
627        let bytes = self.take(4)?;
628        Ok(u32::from_le_bytes(bytes.try_into().unwrap()))
629    }
630
631    fn take_u64(&mut self) -> Result<u64, AtomError> {
632        let bytes = self.take(8)?;
633        Ok(u64::from_le_bytes(bytes.try_into().unwrap()))
634    }
635
636    fn take_bytes(&mut self) -> Result<Vec<u8>, AtomError> {
637        let len = self.take_u32()? as usize;
638        Ok(self.take(len)?.to_vec())
639    }
640
641    fn take_string(&mut self) -> Result<String, AtomError> {
642        let bytes = self.take_bytes()?;
643        String::from_utf8(bytes).map_err(|error| AtomError::Binary(format!("utf8: {error}")))
644    }
645}
646
647fn decode_atom_binary(bytes: &[u8]) -> Result<Atom, AtomError> {
648    let mut cursor = Cursor::new(bytes);
649    let magic = cursor.take(ATOM_BINARY_MAGIC.len())?;
650    if magic != ATOM_BINARY_MAGIC {
651        return Err(AtomError::Binary("magic mismatch".to_string()));
652    }
653    let version = cursor.take_u8()?;
654    if version != ATOM_BINARY_VERSION {
655        return Err(AtomError::Binary(format!(
656            "unsupported version {version}, expected {ATOM_BINARY_VERSION}"
657        )));
658    }
659
660    let ops_len = cursor.take_u32()? as usize;
661    let mut ops = Vec::with_capacity(ops_len);
662    for _ in 0..ops_len {
663        let tag = cursor.take_u8()?;
664        let offset = cursor.take_u64()?;
665        let content = cursor.take_string()?;
666        ops.push(match tag {
667            0 => TextOp::Insert { offset, content },
668            1 => TextOp::Delete { offset, content },
669            other => return Err(AtomError::Binary(format!("unknown op tag {other}"))),
670        });
671    }
672
673    let parents_len = cursor.take_u32()? as usize;
674    let mut parents = Vec::with_capacity(parents_len);
675    for _ in 0..parents_len {
676        let parent_bytes = cursor.take(ATOM_ID_BYTES)?;
677        let mut id = [0u8; ATOM_ID_BYTES];
678        id.copy_from_slice(parent_bytes);
679        parents.push(AtomId(id));
680    }
681
682    let principal = cursor.take_string()?;
683    let persona = cursor.take_string()?;
684    let agent_run_id = cursor.take_string()?;
685    let tool_call_id = match cursor.take_u8()? {
686        0 => None,
687        1 => Some(cursor.take_string()?),
688        other => {
689            return Err(AtomError::Binary(format!(
690                "invalid tool_call_id tag {other}"
691            )))
692        }
693    };
694    let trace_id = cursor.take_string()?;
695    let transcript_ref = cursor.take_string()?;
696    let timestamp_str = cursor.take_string()?;
697    let timestamp = OffsetDateTime::parse(&timestamp_str, &Rfc3339)
698        .map_err(|error| AtomError::Binary(format!("timestamp parse: {error}")))?;
699    let provenance = Provenance {
700        principal,
701        persona,
702        agent_run_id,
703        tool_call_id,
704        trace_id,
705        transcript_ref,
706        timestamp,
707    };
708
709    let inverse_of = match cursor.take_u8()? {
710        0 => None,
711        1 => {
712            let id_bytes = cursor.take(ATOM_ID_BYTES)?;
713            let mut id = [0u8; ATOM_ID_BYTES];
714            id.copy_from_slice(id_bytes);
715            Some(AtomId(id))
716        }
717        other => return Err(AtomError::Binary(format!("invalid inverse_of tag {other}"))),
718    };
719
720    let id_bytes = cursor.take(ATOM_ID_BYTES)?;
721    let mut id = [0u8; ATOM_ID_BYTES];
722    id.copy_from_slice(id_bytes);
723    let id = AtomId(id);
724
725    let principal_key_bytes = cursor.take(ED25519_PUBLIC_KEY_BYTES)?;
726    let mut principal_key = [0u8; ED25519_PUBLIC_KEY_BYTES];
727    principal_key.copy_from_slice(principal_key_bytes);
728    let principal_sig_bytes = cursor.take(ED25519_SIGNATURE_BYTES)?;
729    let mut principal_sig = [0u8; ED25519_SIGNATURE_BYTES];
730    principal_sig.copy_from_slice(principal_sig_bytes);
731    let persona_key_bytes = cursor.take(ED25519_PUBLIC_KEY_BYTES)?;
732    let mut persona_key = [0u8; ED25519_PUBLIC_KEY_BYTES];
733    persona_key.copy_from_slice(persona_key_bytes);
734    let persona_sig_bytes = cursor.take(ED25519_SIGNATURE_BYTES)?;
735    let mut persona_sig = [0u8; ED25519_SIGNATURE_BYTES];
736    persona_sig.copy_from_slice(persona_sig_bytes);
737
738    if cursor.remaining() != 0 {
739        return Err(AtomError::Binary(format!(
740            "trailing bytes after atom: {} bytes left",
741            cursor.remaining()
742        )));
743    }
744
745    Ok(Atom {
746        id,
747        ops,
748        parents,
749        provenance,
750        signature: AtomSignature {
751            principal_key,
752            principal_sig,
753            persona_key,
754            persona_sig,
755        },
756        inverse_of,
757    })
758}
759
760// ---------------------------------------------------------------------------
761// Tests.
762// ---------------------------------------------------------------------------
763
764#[cfg(test)]
765mod tests {
766    use super::*;
767    use ed25519_dalek::SigningKey;
768
769    fn deterministic_signing_key(seed: u8) -> SigningKey {
770        let mut bytes = [0u8; 32];
771        for slot in bytes.iter_mut() {
772            *slot = seed;
773        }
774        SigningKey::from_bytes(&bytes)
775    }
776
777    fn fixed_provenance() -> Provenance {
778        Provenance {
779            principal: "user:alice".to_string(),
780            persona: "ship-captain".to_string(),
781            agent_run_id: "run-0001".to_string(),
782            tool_call_id: Some("tc-42".to_string()),
783            trace_id: "trace-abcd".to_string(),
784            transcript_ref: "transcript:0001#turn-3".to_string(),
785            timestamp: OffsetDateTime::parse("2026-04-24T12:34:56Z", &Rfc3339).unwrap(),
786        }
787    }
788
789    fn sample_ops() -> Vec<TextOp> {
790        vec![
791            TextOp::Insert {
792                offset: 0,
793                content: "Hello, ".to_string(),
794            },
795            TextOp::Insert {
796                offset: 7,
797                content: "world!".to_string(),
798            },
799        ]
800    }
801
802    fn make_atom() -> Atom {
803        let principal = deterministic_signing_key(1);
804        let persona = deterministic_signing_key(2);
805        Atom::sign(
806            sample_ops(),
807            Vec::new(),
808            fixed_provenance(),
809            None,
810            &principal,
811            &persona,
812        )
813        .unwrap()
814    }
815
816    #[test]
817    fn signing_produces_atom_with_verifiable_signatures() {
818        let atom = make_atom();
819        atom.verify().expect("freshly-signed atom must verify");
820    }
821
822    #[test]
823    fn tampering_with_ops_invalidates_content_hash() {
824        let mut atom = make_atom();
825        atom.ops.push(TextOp::Insert {
826            offset: 13,
827            content: "?!".to_string(),
828        });
829        let error = atom.verify_content_hash().unwrap_err();
830        match error {
831            AtomError::ContentHashMismatch { .. } => {}
832            other => panic!("unexpected error: {other}"),
833        }
834    }
835
836    #[test]
837    fn tampering_with_signature_fails_verification() {
838        let mut atom = make_atom();
839        atom.signature.principal_sig[0] ^= 0xff;
840        let error = atom.verify_signatures().unwrap_err();
841        assert!(matches!(error, AtomError::InvalidSignature("principal")));
842    }
843
844    #[test]
845    fn inverse_atom_undoes_apply() {
846        let principal = deterministic_signing_key(1);
847        let persona = deterministic_signing_key(2);
848        let atom = make_atom();
849        let mut document: Vec<u8> = Vec::new();
850        atom.apply(&mut document).unwrap();
851        assert_eq!(std::str::from_utf8(&document).unwrap(), "Hello, world!");
852
853        let inverse = Atom::invert(
854            &atom,
855            Provenance {
856                tool_call_id: None,
857                ..fixed_provenance()
858            },
859            &principal,
860            &persona,
861        )
862        .unwrap();
863
864        inverse.verify().unwrap();
865        assert_eq!(inverse.inverse_of, Some(atom.id));
866        assert_eq!(inverse.parents, vec![atom.id]);
867
868        inverse.apply(&mut document).unwrap();
869        assert!(document.is_empty(), "inverse must restore pre-image");
870    }
871
872    #[test]
873    fn inverse_of_inverse_returns_to_original() {
874        let principal = deterministic_signing_key(1);
875        let persona = deterministic_signing_key(2);
876        let atom = make_atom();
877        let inverse = Atom::invert(&atom, fixed_provenance(), &principal, &persona).unwrap();
878        let inv_inv = Atom::invert(&inverse, fixed_provenance(), &principal, &persona).unwrap();
879        assert_eq!(inv_inv.ops, atom.ops);
880    }
881
882    #[test]
883    fn json_roundtrip_preserves_atom_id() {
884        let atom = make_atom();
885        let json = atom.to_json().unwrap();
886        let decoded = Atom::from_json_slice(json.as_bytes()).unwrap();
887        assert_eq!(decoded, atom);
888        assert_eq!(decoded.id, atom.id);
889        decoded.verify().unwrap();
890    }
891
892    #[test]
893    fn binary_roundtrip_preserves_atom_id() {
894        let atom = make_atom();
895        let bytes = atom.to_binary().unwrap();
896        let decoded = Atom::from_binary_slice(&bytes).unwrap();
897        assert_eq!(decoded, atom);
898        assert_eq!(decoded.id, atom.id);
899        decoded.verify().unwrap();
900    }
901
902    #[test]
903    fn cross_encoding_atom_id_is_stable() {
904        let atom = make_atom();
905        let from_json = Atom::from_json_slice(atom.to_json().unwrap().as_bytes()).unwrap();
906        let from_binary = Atom::from_binary_slice(&atom.to_binary().unwrap()).unwrap();
907        assert_eq!(from_json.id, atom.id);
908        assert_eq!(from_binary.id, atom.id);
909        assert_eq!(from_json.id, from_binary.id);
910    }
911
912    #[test]
913    fn atom_id_is_deterministic_across_signers() {
914        // The AtomId is content-only: re-signing with different keys must
915        // produce the same id.
916        let principal_a = deterministic_signing_key(11);
917        let persona_a = deterministic_signing_key(22);
918        let principal_b = deterministic_signing_key(33);
919        let persona_b = deterministic_signing_key(44);
920        let atom_a = Atom::sign(
921            sample_ops(),
922            Vec::new(),
923            fixed_provenance(),
924            None,
925            &principal_a,
926            &persona_a,
927        )
928        .unwrap();
929        let atom_b = Atom::sign(
930            sample_ops(),
931            Vec::new(),
932            fixed_provenance(),
933            None,
934            &principal_b,
935            &persona_b,
936        )
937        .unwrap();
938        assert_eq!(atom_a.id, atom_b.id);
939        assert_ne!(atom_a.signature, atom_b.signature);
940    }
941
942    #[test]
943    fn binary_decode_rejects_truncated_input() {
944        let atom = make_atom();
945        let bytes = atom.to_binary().unwrap();
946        let truncated = &bytes[..bytes.len() - 1];
947        let error = Atom::from_binary_slice(truncated).unwrap_err();
948        assert!(matches!(error, AtomError::Binary(_)));
949    }
950
951    #[test]
952    fn binary_decode_rejects_trailing_bytes() {
953        let atom = make_atom();
954        let mut bytes = atom.to_binary().unwrap();
955        bytes.push(0xff);
956        let error = Atom::from_binary_slice(&bytes).unwrap_err();
957        assert!(matches!(error, AtomError::Binary(_)));
958    }
959
960    #[test]
961    fn json_decode_rejects_mismatched_id() {
962        let atom = make_atom();
963        let mut value: serde_json::Value = serde_json::from_str(&atom.to_json().unwrap()).unwrap();
964        let other_id = AtomId([0xaau8; ATOM_ID_BYTES]);
965        value["id"] = serde_json::Value::String(other_id.to_hex());
966        let raw = serde_json::to_vec(&value).unwrap();
967        let error = Atom::from_json_slice(&raw).unwrap_err();
968        assert!(matches!(error, AtomError::ContentHashMismatch { .. }));
969    }
970
971    #[test]
972    fn delete_op_round_trips_apply_and_invert() {
973        let principal = deterministic_signing_key(1);
974        let persona = deterministic_signing_key(2);
975        let mut document = b"abcdef".to_vec();
976        let atom = Atom::sign(
977            vec![TextOp::Delete {
978                offset: 1,
979                content: "bcd".to_string(),
980            }],
981            Vec::new(),
982            fixed_provenance(),
983            None,
984            &principal,
985            &persona,
986        )
987        .unwrap();
988        atom.apply(&mut document).unwrap();
989        assert_eq!(document, b"aef");
990        let inverse = Atom::invert(&atom, fixed_provenance(), &principal, &persona).unwrap();
991        inverse.apply(&mut document).unwrap();
992        assert_eq!(document, b"abcdef");
993    }
994
995    #[test]
996    fn delete_op_rejects_content_mismatch() {
997        let principal = deterministic_signing_key(1);
998        let persona = deterministic_signing_key(2);
999        let mut document = b"abcdef".to_vec();
1000        let atom = Atom::sign(
1001            vec![TextOp::Delete {
1002                offset: 0,
1003                content: "wrong".to_string(),
1004            }],
1005            Vec::new(),
1006            fixed_provenance(),
1007            None,
1008            &principal,
1009            &persona,
1010        )
1011        .unwrap();
1012        let error = atom.apply(&mut document).unwrap_err();
1013        assert!(matches!(error, AtomError::Apply(_)));
1014    }
1015
1016    #[test]
1017    fn provenance_inverse_of_propagation() {
1018        let principal = deterministic_signing_key(1);
1019        let persona = deterministic_signing_key(2);
1020        let target = make_atom();
1021        let inverse = Atom::invert(&target, fixed_provenance(), &principal, &persona).unwrap();
1022        // The inverse atom must reference the exact id of the original.
1023        assert_eq!(inverse.inverse_of, Some(target.id));
1024        // Round-tripping through both encodings preserves the inverse_of.
1025        let from_json = Atom::from_json_slice(inverse.to_json().unwrap().as_bytes()).unwrap();
1026        let from_binary = Atom::from_binary_slice(&inverse.to_binary().unwrap()).unwrap();
1027        assert_eq!(from_json.inverse_of, Some(target.id));
1028        assert_eq!(from_binary.inverse_of, Some(target.id));
1029    }
1030}