cortex_core/canonical.rs
1//! Canonical, deterministic, length-prefixed binary encoding for attestation
2//! preimages (T-3.D.0, ADR 0010 §1b, ADR 0014 §"Signed preimage").
3//!
4//! ## Why a custom binary encoder
5//!
6//! ADR 0010 §1b says: *"Verifiers MUST fail closed on unknown or malformed
7//! `schema_version` (no partial verify, no 'best effort' decode)."* That
8//! requires the bytes that go into the Ed25519 signature to be **identical**
9//! across operating systems, serde versions, and language implementations.
10//! `serde_json` does not give that property: object key ordering depends on
11//! the `preserve_order` feature, integer-vs-float coercion can drift, and
12//! whitespace policy differs.
13//!
14//! We therefore use the same framing pattern as
15//! `cortex-ledger::hash::event_hash` (T-1.B.6): a 1-byte domain tag followed
16//! by length-prefixed fields in a **fixed order**. The encoder lives in
17//! `cortex-core` so every crate that needs to verify a signature gets the
18//! same bytes.
19//!
20//! ## Framing
21//!
22//! ```text
23//! signing_input = DOMAIN_TAG_ATTESTATION_PREIMAGE // 1 byte: 0x10
24//! || schema_version (u16, BE) // 2 bytes
25//! || lp(event_source_variant_tag) // 1 + N
26//! || lp(source_field_1) || lp(source_field_2) // each 8 + N
27//! ... // (variant-specific)
28//! || lp(event_id) // 8 + N
29//! || lp(payload_hash) // 8 + N
30//! || lp(session_id) // 8 + N
31//! || lp(ledger_id) // 8 + N
32//! || lp(chain_position OR previous_hash) // 8 + N
33//! || lp(signed_at_iso8601) // 8 + N
34//! || lp(key_id) // 8 + N
35//! ```
36//!
37//! where `lp(x) = (x.len() as u64).to_be_bytes() || x.bytes`. Big-endian is
38//! used for both the `schema_version` and length prefixes; this is
39//! architecture-independent on every CI target.
40//!
41//! ## Domain tag allocation
42//!
43//! `cortex-ledger::hash` reserves 0x01 for `event_hash`. The header comment
44//! in that module also reserved 0x02 for "audit" and 0x03 for "trace seal"
45//! as future-use slots; **none of those reservations have shipped**. To
46//! avoid any chance of collision with that documented reservation block we
47//! allocate the attestation preimage tag at **0x10** (decimal 16), opening
48//! a fresh domain for cryptographic attestations. Future attestation
49//! variants (e.g. rotation envelope) take subsequent values in this block.
50
51use chrono::{DateTime, Utc};
52
53/// Domain tag for attestation preimage framing. Reserved: `0x10`.
54///
55/// MUST NOT be re-used for any other hash / signature input domain. See
56/// module docs for the rationale and reservation table.
57pub const DOMAIN_TAG_ATTESTATION_PREIMAGE: u8 = 0x10;
58
59/// Domain tag for identity-rotation envelope framing. Reserved: `0x11`.
60pub const DOMAIN_TAG_ROTATION_ENVELOPE: u8 = 0x11;
61
62/// Schema version for the attestation preimage encoding.
63///
64/// Per ADR 0010 §1b this is **independent** of [`crate::SCHEMA_VERSION`]:
65/// it governs the bytes that go into the Ed25519 signature, not the wire
66/// shape of the surrounding `Event`. Bump this and write an ADR if any
67/// change to the framing or the field set lands.
68pub const SCHEMA_VERSION_ATTESTATION: u16 = 1;
69
70/// Source-specific identity material that participates in the signed
71/// preimage (ADR 0014 §"Signed preimage": *"`EventSource` variant tag +
72/// **all** source-specific fields"*).
73///
74/// The variant tag string is part of the canonical bytes; the helper
75/// [`Self::variant_tag`] returns it. Adding a variant requires a
76/// [`SCHEMA_VERSION_ATTESTATION`] bump.
77#[derive(Debug, Clone, PartialEq, Eq)]
78pub enum SourceIdentity {
79 /// Operator (human user). No additional source fields.
80 User,
81 /// Ephemeral child agent. Carries `agent_id`, `parent_session_id`,
82 /// `delegation_id`, and `model` per ADR 0014.
83 ChildAgent {
84 /// Stable identifier for this agent instance.
85 agent_id: String,
86 /// Session in which the parent delegated to this child.
87 parent_session_id: String,
88 /// Delegation grant identifier.
89 delegation_id: String,
90 /// Model identifier (free-form; matches the runtime registry).
91 model: String,
92 },
93 /// Tool invocation. Carries the tool `name`.
94 Tool {
95 /// Tool name (free-form; matches the runtime registry).
96 name: String,
97 },
98 /// The Cortex runtime itself. No additional source fields.
99 Runtime,
100 /// Externally-observed outcome. No additional source fields.
101 ExternalOutcome,
102 /// Explicit operator correction. No additional source fields.
103 ManualCorrection,
104}
105
106impl SourceIdentity {
107 /// Stable wire string for this variant. Part of the canonical preimage
108 /// bytes. Renaming requires a [`SCHEMA_VERSION_ATTESTATION`] bump.
109 #[must_use]
110 pub const fn variant_tag(&self) -> &'static str {
111 match self {
112 Self::User => "user",
113 Self::ChildAgent { .. } => "child_agent",
114 Self::Tool { .. } => "tool",
115 Self::Runtime => "runtime",
116 Self::ExternalOutcome => "external_outcome",
117 Self::ManualCorrection => "manual_correction",
118 }
119 }
120}
121
122/// Lineage binding for the preimage (ADR 0014 §Replay: *"preimage MUST
123/// include `chain_position` or `previous_hash`"*).
124///
125/// JSONL appends prefer `PreviousSignature` (Option A from ADR 0010 §2);
126/// CLI attestation records that don't have a previous signature on hand
127/// at sign time use `ChainPosition` (Option B-style monotonic counter).
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub enum LineageBinding {
130 /// Monotonic chain position counter (u64).
131 ChainPosition(u64),
132 /// Hex-encoded previous-row hash (32-byte BLAKE3 → 64 hex chars).
133 PreviousHash(String),
134}
135
136impl LineageBinding {
137 /// Tag byte distinguishing the two variants in the canonical preimage.
138 /// `0x01` for `ChainPosition`, `0x02` for `PreviousHash`. The tag is
139 /// included so a captured signature for `ChainPosition(10)` cannot be
140 /// re-purposed under `PreviousHash("0a…")` even if their canonical
141 /// byte forms happened to coincide.
142 #[must_use]
143 pub const fn tag(&self) -> u8 {
144 match self {
145 Self::ChainPosition(_) => 0x01,
146 Self::PreviousHash(_) => 0x02,
147 }
148 }
149}
150
151/// All material that goes into the Ed25519 signature for an attestation.
152///
153/// Field order in this struct mirrors the canonical byte order of
154/// [`canonical_signing_input`]; do not reorder without bumping
155/// [`SCHEMA_VERSION_ATTESTATION`].
156#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct AttestationPreimage {
158 /// Attestation schema version (governs the canonical encoding only).
159 /// Verifiers MUST fail closed on values they do not understand.
160 pub schema_version: u16,
161 /// Source identity (variant + source-specific fields, **excluding**
162 /// any nested attestation blob — see ADR 0014 §"Signed preimage").
163 pub source: SourceIdentity,
164 /// Stable event identifier (`evt_…`).
165 pub event_id: String,
166 /// Hex-encoded BLAKE3 hash of the canonical event payload.
167 pub payload_hash: String,
168 /// Session identifier (free-form). Stops cross-session replay.
169 pub session_id: String,
170 /// Ledger identifier / session-scoped ledger namespace. Stops
171 /// cross-ledger replay.
172 pub ledger_id: String,
173 /// Lineage binding (chain position OR previous hash). See
174 /// [`LineageBinding`].
175 pub lineage: LineageBinding,
176 /// When the signature was produced (UTC). Encoded as RFC 3339 /
177 /// ISO 8601 — the format is fixed by [`canonical_signing_input`].
178 pub signed_at: DateTime<Utc>,
179 /// Public-key fingerprint of the signing key. Binds the signature to
180 /// the declared key.
181 pub key_id: String,
182}
183
184/// Encode an [`AttestationPreimage`] as a deterministic byte string.
185///
186/// **Output is byte-identical across operating systems and serde versions.**
187/// See module docs for the framing.
188#[must_use]
189pub fn canonical_signing_input(p: &AttestationPreimage) -> Vec<u8> {
190 let mut out = Vec::with_capacity(256);
191 out.push(DOMAIN_TAG_ATTESTATION_PREIMAGE);
192 out.extend_from_slice(&p.schema_version.to_be_bytes());
193
194 // Source: variant tag, then variant-specific fields in fixed order.
195 write_lp(&mut out, p.source.variant_tag().as_bytes());
196 match &p.source {
197 SourceIdentity::User
198 | SourceIdentity::Runtime
199 | SourceIdentity::ExternalOutcome
200 | SourceIdentity::ManualCorrection => {}
201 SourceIdentity::ChildAgent {
202 agent_id,
203 parent_session_id,
204 delegation_id,
205 model,
206 } => {
207 write_lp(&mut out, agent_id.as_bytes());
208 write_lp(&mut out, parent_session_id.as_bytes());
209 write_lp(&mut out, delegation_id.as_bytes());
210 write_lp(&mut out, model.as_bytes());
211 }
212 SourceIdentity::Tool { name } => {
213 write_lp(&mut out, name.as_bytes());
214 }
215 }
216
217 write_lp(&mut out, p.event_id.as_bytes());
218 write_lp(&mut out, p.payload_hash.as_bytes());
219 write_lp(&mut out, p.session_id.as_bytes());
220 write_lp(&mut out, p.ledger_id.as_bytes());
221
222 // Lineage: 1-byte tag + length-prefixed body.
223 out.push(p.lineage.tag());
224 match &p.lineage {
225 LineageBinding::ChainPosition(n) => {
226 // Length prefix is fixed at 8 (u64 BE) so the encoder is uniform.
227 out.extend_from_slice(&8u64.to_be_bytes());
228 out.extend_from_slice(&n.to_be_bytes());
229 }
230 LineageBinding::PreviousHash(hex) => {
231 write_lp(&mut out, hex.as_bytes());
232 }
233 }
234
235 // signed_at: ISO 8601 / RFC 3339 with UTC offset and microsecond precision.
236 // chrono's `to_rfc3339_opts` is deterministic across platforms.
237 let signed_at_str = p
238 .signed_at
239 .to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
240 write_lp(&mut out, signed_at_str.as_bytes());
241
242 write_lp(&mut out, p.key_id.as_bytes());
243
244 out
245}
246
247/// Encode an identity-rotation envelope as a deterministic byte string
248/// (separate domain tag — see [`DOMAIN_TAG_ROTATION_ENVELOPE`]).
249///
250/// ```text
251/// rotation_input = DOMAIN_TAG_ROTATION_ENVELOPE // 1 byte: 0x11
252/// || schema_version (u16, BE) // 2 bytes
253/// || lp(old_pubkey_bytes) // 8 + 32
254/// || lp(new_pubkey_bytes) // 8 + 32
255/// || lp(signed_at_iso8601) // 8 + N
256/// ```
257#[must_use]
258pub fn canonical_rotation_input(
259 schema_version: u16,
260 old_pubkey: &[u8; 32],
261 new_pubkey: &[u8; 32],
262 signed_at: DateTime<Utc>,
263) -> Vec<u8> {
264 let mut out = Vec::with_capacity(128);
265 out.push(DOMAIN_TAG_ROTATION_ENVELOPE);
266 out.extend_from_slice(&schema_version.to_be_bytes());
267 write_lp(&mut out, old_pubkey);
268 write_lp(&mut out, new_pubkey);
269 let signed_at_str = signed_at.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
270 write_lp(&mut out, signed_at_str.as_bytes());
271 out
272}
273
274/// Length-prefix helper. `(bytes.len() as u64).to_be_bytes()` then `bytes`.
275fn write_lp(out: &mut Vec<u8>, bytes: &[u8]) {
276 out.extend_from_slice(&(bytes.len() as u64).to_be_bytes());
277 out.extend_from_slice(bytes);
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283 use chrono::TimeZone;
284
285 fn fixture_preimage() -> AttestationPreimage {
286 AttestationPreimage {
287 schema_version: SCHEMA_VERSION_ATTESTATION,
288 source: SourceIdentity::User,
289 event_id: "evt_01ARZ3NDEKTSV4RRFFQ69G5FAV".into(),
290 payload_hash: "deadbeef".into(),
291 session_id: "session-001".into(),
292 ledger_id: "ledger-main".into(),
293 lineage: LineageBinding::ChainPosition(10),
294 signed_at: Utc.with_ymd_and_hms(2026, 5, 2, 12, 0, 0).unwrap(),
295 key_id: "fp:abc123".into(),
296 }
297 }
298
299 /// Cross-platform determinism: the canonical bytes for a fixed fixture
300 /// MUST equal a hard-coded hex constant. If this test fails, the
301 /// encoder drifted; either revert the change or bump
302 /// [`SCHEMA_VERSION_ATTESTATION`] **and** an ADR.
303 #[test]
304 fn canonical_bytes_match_golden_hex_fixture() {
305 let bytes = canonical_signing_input(&fixture_preimage());
306 let hex = hex_encode(&bytes);
307
308 // Hand-verified golden vector. Generated once on macOS, asserted
309 // identical on Linux + Windows in CI. To regenerate (after a
310 // deliberate framing change + ADR + schema bump): print `hex` and
311 // paste it back here.
312 //
313 // Note: `chrono::DateTime::to_rfc3339_opts(SecondsFormat::Micros,
314 // /* use_z = */ true)` emits `2026-05-02T12:00:00.000000Z` (27
315 // bytes), NOT the `+00:00` long form (32 bytes). The `Z` suffix
316 // is part of the canonical contract.
317 let expected = concat!(
318 "10", // domain tag
319 "0001", // schema_version u16 BE
320 "0000000000000004", // lp len 4 (variant tag "user")
321 "75736572", // "user"
322 "000000000000001e", // lp len 30 (event_id)
323 "6576745f303141525a334e44454b545356345252464651363947354641",
324 "56", // event_id tail
325 "0000000000000008", // lp len 8 (payload_hash)
326 "6465616462656566", // "deadbeef"
327 "000000000000000b", // lp len 11 (session_id)
328 "73657373696f6e2d303031", // "session-001"
329 "000000000000000b", // lp len 11 (ledger_id)
330 "6c65646765722d6d61696e", // "ledger-main"
331 "01", // lineage tag ChainPosition
332 "0000000000000008", // u64 fixed length
333 "000000000000000a", // chain_position = 10
334 "000000000000001b", // lp len 27 (signed_at)
335 "323032362d30352d30325431323a30303a30302e3030303030305a",
336 "0000000000000009", // lp len 9 (key_id)
337 "66703a616263313233", // "fp:abc123"
338 );
339
340 assert_eq!(
341 hex, expected,
342 "canonical encoder drift — bytes must be byte-identical across platforms"
343 );
344 }
345
346 /// Two independent code paths produce the same canonical bytes:
347 /// (a) the `canonical_signing_input` function;
348 /// (b) a hand-built byte vector via the public framing rules.
349 #[test]
350 fn canonical_byte_identical_across_serdes() {
351 let p = fixture_preimage();
352 let from_encoder = canonical_signing_input(&p);
353
354 // Hand-built path mirroring the documented framing.
355 let mut manual: Vec<u8> = Vec::new();
356 manual.push(DOMAIN_TAG_ATTESTATION_PREIMAGE);
357 manual.extend_from_slice(&p.schema_version.to_be_bytes());
358 push_lp(&mut manual, p.source.variant_tag().as_bytes());
359 push_lp(&mut manual, p.event_id.as_bytes());
360 push_lp(&mut manual, p.payload_hash.as_bytes());
361 push_lp(&mut manual, p.session_id.as_bytes());
362 push_lp(&mut manual, p.ledger_id.as_bytes());
363 manual.push(p.lineage.tag());
364 if let LineageBinding::ChainPosition(n) = p.lineage {
365 manual.extend_from_slice(&8u64.to_be_bytes());
366 manual.extend_from_slice(&n.to_be_bytes());
367 }
368 let signed_at_str = p
369 .signed_at
370 .to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
371 push_lp(&mut manual, signed_at_str.as_bytes());
372 push_lp(&mut manual, p.key_id.as_bytes());
373
374 assert_eq!(
375 from_encoder, manual,
376 "encoder output must match the hand-built canonical bytes"
377 );
378 }
379
380 /// Property: rearranging the Rust struct literal does not change the
381 /// signed bytes — only the canonical byte sequence is what is signed.
382 /// (This is implicitly proved by the above tests; we add an explicit
383 /// case for documentation.)
384 #[test]
385 fn field_reorder_does_not_change_signed_semantics() {
386 let p1 = fixture_preimage();
387
388 // Construct the same logical preimage with the field expressions
389 // written in a different source order. Rust struct initialization
390 // order does not affect the in-memory layout, but this makes the
391 // intent of the test explicit.
392 #[allow(clippy::needless_update)]
393 let p2 = AttestationPreimage {
394 key_id: "fp:abc123".into(),
395 signed_at: chrono::Utc.with_ymd_and_hms(2026, 5, 2, 12, 0, 0).unwrap(),
396 lineage: LineageBinding::ChainPosition(10),
397 ledger_id: "ledger-main".into(),
398 session_id: "session-001".into(),
399 payload_hash: "deadbeef".into(),
400 event_id: "evt_01ARZ3NDEKTSV4RRFFQ69G5FAV".into(),
401 source: SourceIdentity::User,
402 schema_version: SCHEMA_VERSION_ATTESTATION,
403 };
404
405 assert_eq!(canonical_signing_input(&p1), canonical_signing_input(&p2));
406 }
407
408 /// Sanity: distinct logical preimages produce distinct canonical bytes.
409 #[test]
410 fn distinct_preimages_produce_distinct_bytes() {
411 let mut p2 = fixture_preimage();
412 p2.event_id = "evt_01ARZ3NDEKTSV4RRFFQ69G5FAW".into();
413 assert_ne!(
414 canonical_signing_input(&fixture_preimage()),
415 canonical_signing_input(&p2)
416 );
417 }
418
419 /// Lineage tag prevents cross-variant collision: a `ChainPosition(0)`
420 /// preimage must not encode to the same bytes as a `PreviousHash("")`
421 /// preimage.
422 #[test]
423 fn lineage_variant_tag_prevents_cross_variant_collision() {
424 let mut a = fixture_preimage();
425 a.lineage = LineageBinding::ChainPosition(0);
426 let mut b = fixture_preimage();
427 b.lineage = LineageBinding::PreviousHash(String::new());
428 assert_ne!(canonical_signing_input(&a), canonical_signing_input(&b));
429 }
430
431 fn push_lp(out: &mut Vec<u8>, bytes: &[u8]) {
432 out.extend_from_slice(&(bytes.len() as u64).to_be_bytes());
433 out.extend_from_slice(bytes);
434 }
435
436 fn hex_encode(bytes: &[u8]) -> String {
437 let mut s = String::with_capacity(bytes.len() * 2);
438 for b in bytes {
439 s.push_str(&format!("{b:02x}"));
440 }
441 s
442 }
443}