pr4xis_runtime/codec.rs
1//! Canonical codec — DAG-CBOR, the deterministic encoding the content address
2//! is computed over.
3//!
4//! Content-addressing REQUIRES a single canonical encoding so that identical
5//! data → identical bytes → the identical [`ContentAddress`], reproducibly
6//! across implementations and toolchains. DAG-CBOR is that form (sorted map
7//! keys, shortest-form integers, no indefinite-length items). `rkyv` — used
8//! elsewhere as a local zero-copy cache — is explicitly NOT the address codec:
9//! its byte layout is version/feature/target-bound, so committing or sharing
10//! rkyv bytes is a cross-toolchain liability. The *address* is over this form.
11//!
12//! Determinism here rests on the encoded values being order-stable by
13//! construction (the runtime's serialized structures use `BTreeMap`/`Vec`, and
14//! the morphism/closure rows are sorted before encoding), so the canonical
15//! bytes do not depend on iteration order.
16//!
17//! Citation: IPLD DAG-CBOR codec specification
18//! (<https://ipld.io/specs/codecs/dag-cbor/>); RFC 8949 (CBOR) §4.2
19//! (deterministically encoded CBOR).
20
21use serde::Serialize;
22use serde::de::DeserializeOwned;
23
24use crate::address::ContentAddress;
25
26/// Errors from the canonical (DAG-CBOR) codec.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum CodecError {
29 /// The value could not be encoded as DAG-CBOR.
30 Encode(String),
31 /// The bytes could not be decoded as DAG-CBOR (malformed, or not the
32 /// expected shape).
33 Decode(String),
34}
35
36impl core::fmt::Display for CodecError {
37 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
38 match self {
39 CodecError::Encode(e) => write!(f, "DAG-CBOR encode: {e}"),
40 CodecError::Decode(e) => write!(f, "DAG-CBOR decode: {e}"),
41 }
42 }
43}
44
45impl std::error::Error for CodecError {}
46
47/// Encode `value` to its canonical DAG-CBOR bytes — the form the content
48/// address is taken over. Equal (order-stable) values produce equal bytes.
49pub fn canonical_encode<T: Serialize>(value: &T) -> Result<Vec<u8>, CodecError> {
50 serde_ipld_dagcbor::to_vec(value).map_err(|e| CodecError::Encode(e.to_string()))
51}
52
53/// Decode `bytes` (canonical DAG-CBOR) back into a value — the inverse of
54/// [`canonical_encode`].
55pub fn canonical_decode<T: DeserializeOwned>(bytes: &[u8]) -> Result<T, CodecError> {
56 serde_ipld_dagcbor::from_slice(bytes).map_err(|e| CodecError::Decode(e.to_string()))
57}
58
59/// The content address of `value`: hash its canonical DAG-CBOR encoding. This
60/// is the bridge from "a definition" (any serializable value) to its place in
61/// the Merkle-DAG — `address_of(definition)` is the node's identity.
62pub fn address_of<T: Serialize>(value: &T) -> Result<ContentAddress, CodecError> {
63 Ok(ContentAddress::of(&canonical_encode(value)?))
64}
65
66#[cfg(test)]
67mod tests {
68 use super::*;
69 use std::collections::BTreeMap;
70
71 #[test]
72 fn encoding_is_deterministic() {
73 let mut m = BTreeMap::new();
74 m.insert("a".to_string(), 1u32);
75 m.insert("b".to_string(), 2u32);
76 assert_eq!(canonical_encode(&m).unwrap(), canonical_encode(&m).unwrap());
77 }
78
79 #[test]
80 fn equal_values_share_an_address() {
81 let v = ("functor", vec!["A", "B"], 3u8);
82 assert_eq!(address_of(&v).unwrap(), address_of(&v).unwrap());
83 }
84
85 #[test]
86 fn distinct_values_get_distinct_addresses() {
87 assert_ne!(address_of(&"A").unwrap(), address_of(&"B").unwrap());
88 }
89
90 #[test]
91 fn structurally_equal_maps_address_equal_regardless_of_build_order() {
92 // BTreeMap is sorted by construction, so two maps with the same
93 // entries built in different orders are the same value — and DAG-CBOR
94 // gives them the same address. (The runtime's structures are sorted
95 // before encoding for exactly this reason.)
96 let mut m1 = BTreeMap::new();
97 m1.insert("z", 1u32);
98 m1.insert("a", 2u32);
99 let mut m2 = BTreeMap::new();
100 m2.insert("a", 2u32);
101 m2.insert("z", 1u32);
102 assert_eq!(address_of(&m1).unwrap(), address_of(&m2).unwrap());
103 }
104
105 #[test]
106 fn decode_inverts_encode() {
107 let v: Vec<(String, u32)> = vec![("a".into(), 1), ("b".into(), 2)];
108 let bytes = canonical_encode(&v).unwrap();
109 let back: Vec<(String, u32)> = canonical_decode(&bytes).unwrap();
110 assert_eq!(v, back);
111 }
112}