Skip to main content

cortex_core/
canonical.rs

1//! Canonical, deterministic, length-prefixed binary encoding for attestation
2//! preimages (T-3.D.0, ADR 0010 §1b, ADR 0014 §"Signed preimage").
3//!
4//! ## Why a custom binary encoder
5//!
6//! ADR 0010 §1b says: *"Verifiers MUST fail closed on unknown or malformed
7//! `schema_version` (no partial verify, no 'best effort' decode)."* That
8//! requires the bytes that go into the Ed25519 signature to be **identical**
9//! across operating systems, serde versions, and language implementations.
10//! `serde_json` does not give that property: object key ordering depends on
11//! the `preserve_order` feature, integer-vs-float coercion can drift, and
12//! whitespace policy differs.
13//!
14//! We therefore use the same framing pattern as
15//! `cortex-ledger::hash::event_hash` (T-1.B.6): a 1-byte domain tag followed
16//! by length-prefixed fields in a **fixed order**. The encoder lives in
17//! `cortex-core` so every crate that needs to verify a signature gets the
18//! same bytes.
19//!
20//! ## Framing
21//!
22//! ```text
23//! signing_input = DOMAIN_TAG_ATTESTATION_PREIMAGE          // 1 byte: 0x10
24//!              || schema_version (u16, BE)                  // 2 bytes
25//!              || lp(event_source_variant_tag)              // 1 + N
26//!              || lp(source_field_1) || lp(source_field_2)  // each 8 + N
27//!                 ...                                       // (variant-specific)
28//!              || lp(event_id)                              // 8 + N
29//!              || lp(payload_hash)                          // 8 + N
30//!              || lp(session_id)                            // 8 + N
31//!              || lp(ledger_id)                             // 8 + N
32//!              || lp(chain_position OR previous_hash)       // 8 + N
33//!              || lp(signed_at_iso8601)                     // 8 + N
34//!              || lp(key_id)                                // 8 + N
35//! ```
36//!
37//! where `lp(x) = (x.len() as u64).to_be_bytes() || x.bytes`. Big-endian is
38//! used for both the `schema_version` and length prefixes; this is
39//! architecture-independent on every CI target.
40//!
41//! ## Domain tag allocation
42//!
43//! `cortex-ledger::hash` reserves 0x01 for `event_hash`. The header comment
44//! in that module also reserved 0x02 for "audit" and 0x03 for "trace seal"
45//! as future-use slots; **none of those reservations have shipped**. To
46//! avoid any chance of collision with that documented reservation block we
47//! allocate the attestation preimage tag at **0x10** (decimal 16), opening
48//! a fresh domain for cryptographic attestations. Future attestation
49//! variants (e.g. rotation envelope) take subsequent values in this block.
50
51use chrono::{DateTime, Utc};
52
53/// Domain tag for attestation preimage framing. Reserved: `0x10`.
54///
55/// MUST NOT be re-used for any other hash / signature input domain. See
56/// module docs for the rationale and reservation table.
57pub const DOMAIN_TAG_ATTESTATION_PREIMAGE: u8 = 0x10;
58
59/// Domain tag for identity-rotation envelope framing. Reserved: `0x11`.
60pub const DOMAIN_TAG_ROTATION_ENVELOPE: u8 = 0x11;
61
62/// Schema version for the attestation preimage encoding.
63///
64/// Per ADR 0010 §1b this is **independent** of [`crate::SCHEMA_VERSION`]:
65/// it governs the bytes that go into the Ed25519 signature, not the wire
66/// shape of the surrounding `Event`. Bump this and write an ADR if any
67/// change to the framing or the field set lands.
68pub const SCHEMA_VERSION_ATTESTATION: u16 = 1;
69
70/// Source-specific identity material that participates in the signed
71/// preimage (ADR 0014 §"Signed preimage": *"`EventSource` variant tag +
72/// **all** source-specific fields"*).
73///
74/// The variant tag string is part of the canonical bytes; the helper
75/// [`Self::variant_tag`] returns it. Adding a variant requires a
76/// [`SCHEMA_VERSION_ATTESTATION`] bump.
77#[derive(Debug, Clone, PartialEq, Eq)]
78pub enum SourceIdentity {
79    /// Operator (human user). No additional source fields.
80    User,
81    /// Ephemeral child agent. Carries `agent_id`, `parent_session_id`,
82    /// `delegation_id`, and `model` per ADR 0014.
83    ChildAgent {
84        /// Stable identifier for this agent instance.
85        agent_id: String,
86        /// Session in which the parent delegated to this child.
87        parent_session_id: String,
88        /// Delegation grant identifier.
89        delegation_id: String,
90        /// Model identifier (free-form; matches the runtime registry).
91        model: String,
92    },
93    /// Tool invocation. Carries the tool `name`.
94    Tool {
95        /// Tool name (free-form; matches the runtime registry).
96        name: String,
97    },
98    /// The Cortex runtime itself. No additional source fields.
99    Runtime,
100    /// Externally-observed outcome. No additional source fields.
101    ExternalOutcome,
102    /// Explicit operator correction. No additional source fields.
103    ManualCorrection,
104}
105
106impl SourceIdentity {
107    /// Stable wire string for this variant. Part of the canonical preimage
108    /// bytes. Renaming requires a [`SCHEMA_VERSION_ATTESTATION`] bump.
109    #[must_use]
110    pub const fn variant_tag(&self) -> &'static str {
111        match self {
112            Self::User => "user",
113            Self::ChildAgent { .. } => "child_agent",
114            Self::Tool { .. } => "tool",
115            Self::Runtime => "runtime",
116            Self::ExternalOutcome => "external_outcome",
117            Self::ManualCorrection => "manual_correction",
118        }
119    }
120}
121
122/// Lineage binding for the preimage (ADR 0014 §Replay: *"preimage MUST
123/// include `chain_position` or `previous_hash`"*).
124///
125/// JSONL appends prefer `PreviousSignature` (Option A from ADR 0010 §2);
126/// CLI attestation records that don't have a previous signature on hand
127/// at sign time use `ChainPosition` (Option B-style monotonic counter).
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub enum LineageBinding {
130    /// Monotonic chain position counter (u64).
131    ChainPosition(u64),
132    /// Hex-encoded previous-row hash (32-byte BLAKE3 → 64 hex chars).
133    PreviousHash(String),
134}
135
136impl LineageBinding {
137    /// Tag byte distinguishing the two variants in the canonical preimage.
138    /// `0x01` for `ChainPosition`, `0x02` for `PreviousHash`. The tag is
139    /// included so a captured signature for `ChainPosition(10)` cannot be
140    /// re-purposed under `PreviousHash("0a…")` even if their canonical
141    /// byte forms happened to coincide.
142    #[must_use]
143    pub const fn tag(&self) -> u8 {
144        match self {
145            Self::ChainPosition(_) => 0x01,
146            Self::PreviousHash(_) => 0x02,
147        }
148    }
149}
150
151/// All material that goes into the Ed25519 signature for an attestation.
152///
153/// Field order in this struct mirrors the canonical byte order of
154/// [`canonical_signing_input`]; do not reorder without bumping
155/// [`SCHEMA_VERSION_ATTESTATION`].
156#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct AttestationPreimage {
158    /// Attestation schema version (governs the canonical encoding only).
159    /// Verifiers MUST fail closed on values they do not understand.
160    pub schema_version: u16,
161    /// Source identity (variant + source-specific fields, **excluding**
162    /// any nested attestation blob — see ADR 0014 §"Signed preimage").
163    pub source: SourceIdentity,
164    /// Stable event identifier (`evt_…`).
165    pub event_id: String,
166    /// Hex-encoded BLAKE3 hash of the canonical event payload.
167    pub payload_hash: String,
168    /// Session identifier (free-form). Stops cross-session replay.
169    pub session_id: String,
170    /// Ledger identifier / session-scoped ledger namespace. Stops
171    /// cross-ledger replay.
172    pub ledger_id: String,
173    /// Lineage binding (chain position OR previous hash). See
174    /// [`LineageBinding`].
175    pub lineage: LineageBinding,
176    /// When the signature was produced (UTC). Encoded as RFC 3339 /
177    /// ISO 8601 — the format is fixed by [`canonical_signing_input`].
178    pub signed_at: DateTime<Utc>,
179    /// Public-key fingerprint of the signing key. Binds the signature to
180    /// the declared key.
181    pub key_id: String,
182}
183
184/// Encode an [`AttestationPreimage`] as a deterministic byte string.
185///
186/// **Output is byte-identical across operating systems and serde versions.**
187/// See module docs for the framing.
188#[must_use]
189pub fn canonical_signing_input(p: &AttestationPreimage) -> Vec<u8> {
190    let mut out = Vec::with_capacity(256);
191    out.push(DOMAIN_TAG_ATTESTATION_PREIMAGE);
192    out.extend_from_slice(&p.schema_version.to_be_bytes());
193
194    // Source: variant tag, then variant-specific fields in fixed order.
195    write_lp(&mut out, p.source.variant_tag().as_bytes());
196    match &p.source {
197        SourceIdentity::User
198        | SourceIdentity::Runtime
199        | SourceIdentity::ExternalOutcome
200        | SourceIdentity::ManualCorrection => {}
201        SourceIdentity::ChildAgent {
202            agent_id,
203            parent_session_id,
204            delegation_id,
205            model,
206        } => {
207            write_lp(&mut out, agent_id.as_bytes());
208            write_lp(&mut out, parent_session_id.as_bytes());
209            write_lp(&mut out, delegation_id.as_bytes());
210            write_lp(&mut out, model.as_bytes());
211        }
212        SourceIdentity::Tool { name } => {
213            write_lp(&mut out, name.as_bytes());
214        }
215    }
216
217    write_lp(&mut out, p.event_id.as_bytes());
218    write_lp(&mut out, p.payload_hash.as_bytes());
219    write_lp(&mut out, p.session_id.as_bytes());
220    write_lp(&mut out, p.ledger_id.as_bytes());
221
222    // Lineage: 1-byte tag + length-prefixed body.
223    out.push(p.lineage.tag());
224    match &p.lineage {
225        LineageBinding::ChainPosition(n) => {
226            // Length prefix is fixed at 8 (u64 BE) so the encoder is uniform.
227            out.extend_from_slice(&8u64.to_be_bytes());
228            out.extend_from_slice(&n.to_be_bytes());
229        }
230        LineageBinding::PreviousHash(hex) => {
231            write_lp(&mut out, hex.as_bytes());
232        }
233    }
234
235    // signed_at: ISO 8601 / RFC 3339 with UTC offset and microsecond precision.
236    // chrono's `to_rfc3339_opts` is deterministic across platforms.
237    let signed_at_str = p
238        .signed_at
239        .to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
240    write_lp(&mut out, signed_at_str.as_bytes());
241
242    write_lp(&mut out, p.key_id.as_bytes());
243
244    out
245}
246
247/// Encode an identity-rotation envelope as a deterministic byte string
248/// (separate domain tag — see [`DOMAIN_TAG_ROTATION_ENVELOPE`]).
249///
250/// ```text
251/// rotation_input = DOMAIN_TAG_ROTATION_ENVELOPE              // 1 byte: 0x11
252///               || schema_version (u16, BE)                   // 2 bytes
253///               || lp(old_pubkey_bytes)                       // 8 + 32
254///               || lp(new_pubkey_bytes)                       // 8 + 32
255///               || lp(signed_at_iso8601)                      // 8 + N
256/// ```
257#[must_use]
258pub fn canonical_rotation_input(
259    schema_version: u16,
260    old_pubkey: &[u8; 32],
261    new_pubkey: &[u8; 32],
262    signed_at: DateTime<Utc>,
263) -> Vec<u8> {
264    let mut out = Vec::with_capacity(128);
265    out.push(DOMAIN_TAG_ROTATION_ENVELOPE);
266    out.extend_from_slice(&schema_version.to_be_bytes());
267    write_lp(&mut out, old_pubkey);
268    write_lp(&mut out, new_pubkey);
269    let signed_at_str = signed_at.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
270    write_lp(&mut out, signed_at_str.as_bytes());
271    out
272}
273
274/// Length-prefix helper. `(bytes.len() as u64).to_be_bytes()` then `bytes`.
275fn write_lp(out: &mut Vec<u8>, bytes: &[u8]) {
276    out.extend_from_slice(&(bytes.len() as u64).to_be_bytes());
277    out.extend_from_slice(bytes);
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283    use chrono::TimeZone;
284
285    fn fixture_preimage() -> AttestationPreimage {
286        AttestationPreimage {
287            schema_version: SCHEMA_VERSION_ATTESTATION,
288            source: SourceIdentity::User,
289            event_id: "evt_01ARZ3NDEKTSV4RRFFQ69G5FAV".into(),
290            payload_hash: "deadbeef".into(),
291            session_id: "session-001".into(),
292            ledger_id: "ledger-main".into(),
293            lineage: LineageBinding::ChainPosition(10),
294            signed_at: Utc.with_ymd_and_hms(2026, 5, 2, 12, 0, 0).unwrap(),
295            key_id: "fp:abc123".into(),
296        }
297    }
298
299    /// Cross-platform determinism: the canonical bytes for a fixed fixture
300    /// MUST equal a hard-coded hex constant. If this test fails, the
301    /// encoder drifted; either revert the change or bump
302    /// [`SCHEMA_VERSION_ATTESTATION`] **and** an ADR.
303    #[test]
304    fn canonical_bytes_match_golden_hex_fixture() {
305        let bytes = canonical_signing_input(&fixture_preimage());
306        let hex = hex_encode(&bytes);
307
308        // Hand-verified golden vector. Generated once on macOS, asserted
309        // identical on Linux + Windows in CI. To regenerate (after a
310        // deliberate framing change + ADR + schema bump): print `hex` and
311        // paste it back here.
312        //
313        // Note: `chrono::DateTime::to_rfc3339_opts(SecondsFormat::Micros,
314        // /* use_z = */ true)` emits `2026-05-02T12:00:00.000000Z` (27
315        // bytes), NOT the `+00:00` long form (32 bytes). The `Z` suffix
316        // is part of the canonical contract.
317        let expected = concat!(
318            "10",               // domain tag
319            "0001",             // schema_version u16 BE
320            "0000000000000004", // lp len 4 (variant tag "user")
321            "75736572",         // "user"
322            "000000000000001e", // lp len 30 (event_id)
323            "6576745f303141525a334e44454b545356345252464651363947354641",
324            "56",                     // event_id tail
325            "0000000000000008",       // lp len 8 (payload_hash)
326            "6465616462656566",       // "deadbeef"
327            "000000000000000b",       // lp len 11 (session_id)
328            "73657373696f6e2d303031", // "session-001"
329            "000000000000000b",       // lp len 11 (ledger_id)
330            "6c65646765722d6d61696e", // "ledger-main"
331            "01",                     // lineage tag ChainPosition
332            "0000000000000008",       // u64 fixed length
333            "000000000000000a",       // chain_position = 10
334            "000000000000001b",       // lp len 27 (signed_at)
335            "323032362d30352d30325431323a30303a30302e3030303030305a",
336            "0000000000000009",   // lp len 9 (key_id)
337            "66703a616263313233", // "fp:abc123"
338        );
339
340        assert_eq!(
341            hex, expected,
342            "canonical encoder drift — bytes must be byte-identical across platforms"
343        );
344    }
345
346    /// Two independent code paths produce the same canonical bytes:
347    /// (a) the `canonical_signing_input` function;
348    /// (b) a hand-built byte vector via the public framing rules.
349    #[test]
350    fn canonical_byte_identical_across_serdes() {
351        let p = fixture_preimage();
352        let from_encoder = canonical_signing_input(&p);
353
354        // Hand-built path mirroring the documented framing.
355        let mut manual: Vec<u8> = Vec::new();
356        manual.push(DOMAIN_TAG_ATTESTATION_PREIMAGE);
357        manual.extend_from_slice(&p.schema_version.to_be_bytes());
358        push_lp(&mut manual, p.source.variant_tag().as_bytes());
359        push_lp(&mut manual, p.event_id.as_bytes());
360        push_lp(&mut manual, p.payload_hash.as_bytes());
361        push_lp(&mut manual, p.session_id.as_bytes());
362        push_lp(&mut manual, p.ledger_id.as_bytes());
363        manual.push(p.lineage.tag());
364        if let LineageBinding::ChainPosition(n) = p.lineage {
365            manual.extend_from_slice(&8u64.to_be_bytes());
366            manual.extend_from_slice(&n.to_be_bytes());
367        }
368        let signed_at_str = p
369            .signed_at
370            .to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
371        push_lp(&mut manual, signed_at_str.as_bytes());
372        push_lp(&mut manual, p.key_id.as_bytes());
373
374        assert_eq!(
375            from_encoder, manual,
376            "encoder output must match the hand-built canonical bytes"
377        );
378    }
379
380    /// Property: rearranging the Rust struct literal does not change the
381    /// signed bytes — only the canonical byte sequence is what is signed.
382    /// (This is implicitly proved by the above tests; we add an explicit
383    /// case for documentation.)
384    #[test]
385    fn field_reorder_does_not_change_signed_semantics() {
386        let p1 = fixture_preimage();
387
388        // Construct the same logical preimage with the field expressions
389        // written in a different source order. Rust struct initialization
390        // order does not affect the in-memory layout, but this makes the
391        // intent of the test explicit.
392        #[allow(clippy::needless_update)]
393        let p2 = AttestationPreimage {
394            key_id: "fp:abc123".into(),
395            signed_at: chrono::Utc.with_ymd_and_hms(2026, 5, 2, 12, 0, 0).unwrap(),
396            lineage: LineageBinding::ChainPosition(10),
397            ledger_id: "ledger-main".into(),
398            session_id: "session-001".into(),
399            payload_hash: "deadbeef".into(),
400            event_id: "evt_01ARZ3NDEKTSV4RRFFQ69G5FAV".into(),
401            source: SourceIdentity::User,
402            schema_version: SCHEMA_VERSION_ATTESTATION,
403        };
404
405        assert_eq!(canonical_signing_input(&p1), canonical_signing_input(&p2));
406    }
407
408    /// Sanity: distinct logical preimages produce distinct canonical bytes.
409    #[test]
410    fn distinct_preimages_produce_distinct_bytes() {
411        let mut p2 = fixture_preimage();
412        p2.event_id = "evt_01ARZ3NDEKTSV4RRFFQ69G5FAW".into();
413        assert_ne!(
414            canonical_signing_input(&fixture_preimage()),
415            canonical_signing_input(&p2)
416        );
417    }
418
419    /// Lineage tag prevents cross-variant collision: a `ChainPosition(0)`
420    /// preimage must not encode to the same bytes as a `PreviousHash("")`
421    /// preimage.
422    #[test]
423    fn lineage_variant_tag_prevents_cross_variant_collision() {
424        let mut a = fixture_preimage();
425        a.lineage = LineageBinding::ChainPosition(0);
426        let mut b = fixture_preimage();
427        b.lineage = LineageBinding::PreviousHash(String::new());
428        assert_ne!(canonical_signing_input(&a), canonical_signing_input(&b));
429    }
430
431    fn push_lp(out: &mut Vec<u8>, bytes: &[u8]) {
432        out.extend_from_slice(&(bytes.len() as u64).to_be_bytes());
433        out.extend_from_slice(bytes);
434    }
435
436    fn hex_encode(bytes: &[u8]) -> String {
437        let mut s = String::with_capacity(bytes.len() * 2);
438        for b in bytes {
439            s.push_str(&format!("{b:02x}"));
440        }
441        s
442    }
443}