Skip to main content

pr4xis_runtime/
codec.rs

1//! Canonical codec — DAG-CBOR, the deterministic encoding the content address
2//! is computed over.
3//!
4//! Content-addressing REQUIRES a single canonical encoding so that identical
5//! data → identical bytes → the identical [`ContentAddress`], reproducibly
6//! across implementations and toolchains. DAG-CBOR is that form (sorted map
7//! keys, shortest-form integers, no indefinite-length items). `rkyv` — used
8//! elsewhere as a local zero-copy cache — is explicitly NOT the address codec:
9//! its byte layout is version/feature/target-bound, so committing or sharing
10//! rkyv bytes is a cross-toolchain liability. The *address* is over this form.
11//!
12//! Determinism here rests on the encoded values being order-stable by
13//! construction (the runtime's serialized structures use `BTreeMap`/`Vec`, and
14//! the morphism/closure rows are sorted before encoding), so the canonical
15//! bytes do not depend on iteration order.
16//!
17//! Citation: IPLD DAG-CBOR codec specification
18//! (<https://ipld.io/specs/codecs/dag-cbor/>); RFC 8949 (CBOR) §4.2
19//! (deterministically encoded CBOR).
20
21use serde::Serialize;
22use serde::de::DeserializeOwned;
23
24use crate::address::ContentAddress;
25
26/// Errors from the canonical (DAG-CBOR) codec.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum CodecError {
29    /// The value could not be encoded as DAG-CBOR.
30    Encode(String),
31    /// The bytes could not be decoded as DAG-CBOR (malformed, or not the
32    /// expected shape).
33    Decode(String),
34}
35
36impl core::fmt::Display for CodecError {
37    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
38        match self {
39            CodecError::Encode(e) => write!(f, "DAG-CBOR encode: {e}"),
40            CodecError::Decode(e) => write!(f, "DAG-CBOR decode: {e}"),
41        }
42    }
43}
44
45impl std::error::Error for CodecError {}
46
47/// Encode `value` to its canonical DAG-CBOR bytes — the form the content
48/// address is taken over. Equal (order-stable) values produce equal bytes.
49pub fn canonical_encode<T: Serialize>(value: &T) -> Result<Vec<u8>, CodecError> {
50    serde_ipld_dagcbor::to_vec(value).map_err(|e| CodecError::Encode(e.to_string()))
51}
52
53/// Decode `bytes` (canonical DAG-CBOR) back into a value — the inverse of
54/// [`canonical_encode`].
55pub fn canonical_decode<T: DeserializeOwned>(bytes: &[u8]) -> Result<T, CodecError> {
56    serde_ipld_dagcbor::from_slice(bytes).map_err(|e| CodecError::Decode(e.to_string()))
57}
58
59/// The content address of `value`: hash its canonical DAG-CBOR encoding. This
60/// is the bridge from "a definition" (any serializable value) to its place in
61/// the Merkle-DAG — `address_of(definition)` is the node's identity.
62pub fn address_of<T: Serialize>(value: &T) -> Result<ContentAddress, CodecError> {
63    Ok(ContentAddress::of(&canonical_encode(value)?))
64}
65
66#[cfg(test)]
67mod tests {
68    use super::*;
69    use std::collections::BTreeMap;
70
71    #[test]
72    fn encoding_is_deterministic() {
73        let mut m = BTreeMap::new();
74        m.insert("a".to_string(), 1u32);
75        m.insert("b".to_string(), 2u32);
76        assert_eq!(canonical_encode(&m).unwrap(), canonical_encode(&m).unwrap());
77    }
78
79    #[test]
80    fn equal_values_share_an_address() {
81        let v = ("functor", vec!["A", "B"], 3u8);
82        assert_eq!(address_of(&v).unwrap(), address_of(&v).unwrap());
83    }
84
85    #[test]
86    fn distinct_values_get_distinct_addresses() {
87        assert_ne!(address_of(&"A").unwrap(), address_of(&"B").unwrap());
88    }
89
90    #[test]
91    fn structurally_equal_maps_address_equal_regardless_of_build_order() {
92        // BTreeMap is sorted by construction, so two maps with the same
93        // entries built in different orders are the same value — and DAG-CBOR
94        // gives them the same address. (The runtime's structures are sorted
95        // before encoding for exactly this reason.)
96        let mut m1 = BTreeMap::new();
97        m1.insert("z", 1u32);
98        m1.insert("a", 2u32);
99        let mut m2 = BTreeMap::new();
100        m2.insert("a", 2u32);
101        m2.insert("z", 1u32);
102        assert_eq!(address_of(&m1).unwrap(), address_of(&m2).unwrap());
103    }
104
105    #[test]
106    fn decode_inverts_encode() {
107        let v: Vec<(String, u32)> = vec![("a".into(), 1), ("b".into(), 2)];
108        let bytes = canonical_encode(&v).unwrap();
109        let back: Vec<(String, u32)> = canonical_decode(&bytes).unwrap();
110        assert_eq!(v, back);
111    }
112}