Skip to main content

pr4xis_runtime/
codec.rs

1//! Canonical codec — DAG-CBOR, the deterministic encoding the content address
2//! is computed over.
3//!
4//! Content-addressing REQUIRES a single canonical encoding so that identical
5//! data → identical bytes → the identical [`ContentAddress`], reproducibly
6//! across implementations and toolchains. DAG-CBOR is that form (sorted map
7//! keys, shortest-form integers, no indefinite-length items). `rkyv` — used
8//! elsewhere as a local zero-copy cache — is explicitly NOT the address codec:
9//! its byte layout is version/feature/target-bound, so committing or sharing
10//! rkyv bytes is a cross-toolchain liability. The *address* is over this form.
11//!
12//! Determinism here rests on the encoded values being order-stable by
13//! construction (the runtime's serialized structures use `BTreeMap`/`Vec`, and
14//! the morphism/closure rows are sorted before encoding), so the canonical
15//! bytes do not depend on iteration order.
16//!
17//! Citation: IPLD DAG-CBOR codec specification
18//! (<https://ipld.io/specs/codecs/dag-cbor/>); RFC 8949 (CBOR) §4.2
19//! (deterministically encoded CBOR).
20
21use serde::Serialize;
22use serde::de::DeserializeOwned;
23
24use crate::address::ContentAddress;
25
26/// Errors from the canonical (DAG-CBOR) codec.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum CodecError {
29    /// The value could not be encoded as DAG-CBOR.
30    Encode(String),
31    /// The bytes could not be decoded as DAG-CBOR (malformed, or not the
32    /// expected shape).
33    Decode(String),
34}
35
36impl core::fmt::Display for CodecError {
37    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
38        match self {
39            CodecError::Encode(e) => write!(f, "DAG-CBOR encode: {e}"),
40            CodecError::Decode(e) => write!(f, "DAG-CBOR decode: {e}"),
41        }
42    }
43}
44
45impl std::error::Error for CodecError {}
46
47/// Encode `value` to its canonical DAG-CBOR bytes — the form the content
48/// address is taken over. Equal (order-stable) values produce equal bytes.
49pub fn canonical_encode<T: Serialize>(value: &T) -> Result<Vec<u8>, CodecError> {
50    serde_ipld_dagcbor::to_vec(value).map_err(|e| CodecError::Encode(e.to_string()))
51}
52
53/// Decode `bytes` (canonical DAG-CBOR) back into a value — the inverse of
54/// [`canonical_encode`].
55pub fn canonical_decode<T: DeserializeOwned>(bytes: &[u8]) -> Result<T, CodecError> {
56    serde_ipld_dagcbor::from_slice(bytes).map_err(|e| CodecError::Decode(e.to_string()))
57}
58
59/// The content address of `value`: hash its canonical DAG-CBOR encoding. This
60/// is the bridge from "a definition" (any serializable value) to its place in
61/// the Merkle-DAG — `address_of(definition)` is the node's identity.
62pub fn address_of<T: Serialize>(value: &T) -> Result<ContentAddress, CodecError> {
63    Ok(ContentAddress::of(&canonical_encode(value)?))
64}
65
66#[cfg(test)]
67mod tests {
68    use super::*;
69    use std::collections::BTreeMap;
70
71    #[test]
72    fn encoding_is_deterministic() {
73        let mut m = BTreeMap::new();
74        m.insert("a".to_string(), 1u32);
75        m.insert("b".to_string(), 2u32);
76        assert_eq!(canonical_encode(&m).unwrap(), canonical_encode(&m).unwrap());
77    }
78
79    // `canonical_decode` is the substrate's untrusted-input boundary — the `.prx`
80    // loader (and the wasm/web demo) deserialize ontologies through it. Honest at
81    // this boundary is totality: an adversarial archive must be REFUSED with an
82    // error, never drive an unbounded allocation from an attacker-declared length
83    // (the allocation-bomb DoS class). These feed a length prefix claiming 2^64-1
84    // items with no payload; a decoder that pre-allocates would OOM/abort, a
85    // robust one reads to EOF and returns Err.
86
87    #[test]
88    fn decode_refuses_huge_array_length_without_oom() {
89        // DAG-CBOR array header (major type 4), 8-byte length = u64::MAX.
90        let adversarial = [0x9b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff];
91        assert!(canonical_decode::<Vec<u32>>(&adversarial).is_err());
92    }
93
94    #[test]
95    fn decode_refuses_huge_byte_string_length_without_oom() {
96        // DAG-CBOR byte-string header (major type 2), 8-byte length = u64::MAX.
97        let adversarial = [0x5b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff];
98        assert!(canonical_decode::<Vec<u8>>(&adversarial).is_err());
99    }
100
101    #[test]
102    fn decode_refuses_huge_map_length_without_oom() {
103        // DAG-CBOR map header (major type 5), 8-byte length = u64::MAX.
104        let adversarial = [0xbb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff];
105        assert!(canonical_decode::<BTreeMap<String, u32>>(&adversarial).is_err());
106    }
107
108    #[test]
109    fn equal_values_share_an_address() {
110        let v = ("functor", vec!["A", "B"], 3u8);
111        assert_eq!(address_of(&v).unwrap(), address_of(&v).unwrap());
112    }
113
114    #[test]
115    fn distinct_values_get_distinct_addresses() {
116        assert_ne!(address_of(&"A").unwrap(), address_of(&"B").unwrap());
117    }
118
119    #[test]
120    fn structurally_equal_maps_address_equal_regardless_of_build_order() {
121        // BTreeMap is sorted by construction, so two maps with the same
122        // entries built in different orders are the same value — and DAG-CBOR
123        // gives them the same address. (The runtime's structures are sorted
124        // before encoding for exactly this reason.)
125        let mut m1 = BTreeMap::new();
126        m1.insert("z", 1u32);
127        m1.insert("a", 2u32);
128        let mut m2 = BTreeMap::new();
129        m2.insert("a", 2u32);
130        m2.insert("z", 1u32);
131        assert_eq!(address_of(&m1).unwrap(), address_of(&m2).unwrap());
132    }
133
134    #[test]
135    fn decode_inverts_encode() {
136        let v: Vec<(String, u32)> = vec![("a".into(), 1), ("b".into(), 2)];
137        let bytes = canonical_encode(&v).unwrap();
138        let back: Vec<(String, u32)> = canonical_decode(&bytes).unwrap();
139        assert_eq!(v, back);
140    }
141}