Skip to main content

mfm_machine/
hashing.rs

1//! Canonical JSON and `ArtifactId` hashing helpers.
2//!
3//! Policy:
4//! - Structured data MUST be hashed as canonical JSON bytes (RFC 8785 / JCS-style).
5//! - Hashed JSON MUST NOT contain floats (fractional numbers); use integer-scaled values or strings instead.
6
7use crate::ids::ArtifactId;
8
9/// Errors returned when a JSON value cannot participate in canonical hashing.
10#[derive(Clone, Debug, PartialEq, Eq)]
11pub enum CanonicalJsonError {
12    /// A floating-point number was encountered in a value that must be canonically hashable.
13    FloatNotAllowed,
14    /// The value appears to contain secret-bearing fields and cannot be persisted or hashed.
15    SecretsNotAllowed,
16}
17
18impl std::fmt::Display for CanonicalJsonError {
19    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20        match self {
21            CanonicalJsonError::FloatNotAllowed => {
22                write!(
23                    f,
24                    "floats are not allowed in canonical-json-hashed structures"
25                )
26            }
27            CanonicalJsonError::SecretsNotAllowed => {
28                write!(
29                    f,
30                    "secrets are not allowed in persisted canonical-json structures"
31                )
32            }
33        }
34    }
35}
36
37impl std::error::Error for CanonicalJsonError {}
38
39/// Serialize a `serde_json::Value` into canonical JSON bytes.
40///
41/// Target semantics: RFC 8785 (JCS-style) canonicalization.
42/// Additional constraint: floats are rejected.
43pub fn canonical_json_bytes(value: &serde_json::Value) -> Result<Vec<u8>, CanonicalJsonError> {
44    if crate::secrets::json_contains_secrets(value) {
45        return Err(CanonicalJsonError::SecretsNotAllowed);
46    }
47    let mut out = Vec::new();
48    write_canonical_json(value, &mut out)?;
49    Ok(out)
50}
51
52/// Compute the content-addressed `ArtifactId` for raw bytes (SHA-256 lowercase hex).
53pub fn artifact_id_for_bytes(bytes: &[u8]) -> ArtifactId {
54    let digest = ring::digest::digest(&ring::digest::SHA256, bytes);
55    ArtifactId(hex::encode(digest.as_ref()))
56}
57
58/// Compute the content-addressed `ArtifactId` for structured JSON (canonical JSON bytes + SHA-256).
59pub fn artifact_id_for_json(value: &serde_json::Value) -> Result<ArtifactId, CanonicalJsonError> {
60    Ok(artifact_id_for_bytes(&canonical_json_bytes(value)?))
61}
62
63fn write_canonical_json(
64    value: &serde_json::Value,
65    out: &mut Vec<u8>,
66) -> Result<(), CanonicalJsonError> {
67    match value {
68        serde_json::Value::Null => out.extend_from_slice(b"null"),
69        serde_json::Value::Bool(true) => out.extend_from_slice(b"true"),
70        serde_json::Value::Bool(false) => out.extend_from_slice(b"false"),
71        serde_json::Value::Number(n) => {
72            if let Some(i) = n.as_i64() {
73                out.extend_from_slice(i.to_string().as_bytes());
74            } else if let Some(u) = n.as_u64() {
75                out.extend_from_slice(u.to_string().as_bytes());
76            } else {
77                return Err(CanonicalJsonError::FloatNotAllowed);
78            }
79        }
80        serde_json::Value::String(s) => {
81            // Delegate escaping/quoting to serde_json.
82            let json = serde_json::to_string(s).expect("string serialization must not fail");
83            out.extend_from_slice(json.as_bytes());
84        }
85        serde_json::Value::Array(a) => {
86            out.push(b'[');
87            for (idx, v) in a.iter().enumerate() {
88                if idx != 0 {
89                    out.push(b',');
90                }
91                write_canonical_json(v, out)?;
92            }
93            out.push(b']');
94        }
95        serde_json::Value::Object(m) => {
96            out.push(b'{');
97
98            // RFC 8785 (JCS) requires lexicographic ordering of member names.
99            let mut keys: Vec<&String> = m.keys().collect();
100            keys.sort();
101
102            for (idx, key) in keys.iter().enumerate() {
103                if idx != 0 {
104                    out.push(b',');
105                }
106                let k = serde_json::to_string(key).expect("string serialization must not fail");
107                out.extend_from_slice(k.as_bytes());
108                out.push(b':');
109                write_canonical_json(&m[*key], out)?;
110            }
111
112            out.push(b'}');
113        }
114    }
115
116    Ok(())
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn canonical_json_vectors_basic() {
125        let v = serde_json::json!({"b": 1, "a": 2});
126        assert_eq!(canonical_json_bytes(&v).unwrap(), br#"{"a":2,"b":1}"#);
127
128        let v = serde_json::json!({"arr": [true, null, 3], "obj": {"y": 2, "x": 1}});
129        assert_eq!(
130            canonical_json_bytes(&v).unwrap(),
131            br#"{"arr":[true,null,3],"obj":{"x":1,"y":2}}"#
132        );
133
134        let v = serde_json::json!({"s": "a\nb"});
135        assert_eq!(canonical_json_bytes(&v).unwrap(), br#"{"s":"a\nb"}"#);
136    }
137
138    #[test]
139    fn canonical_json_rejects_floats() {
140        let v = serde_json::json!({"x": 1.5});
141        assert_eq!(
142            canonical_json_bytes(&v).unwrap_err(),
143            CanonicalJsonError::FloatNotAllowed
144        );
145
146        let v = serde_json::json!([1.0, 2]);
147        assert_eq!(
148            canonical_json_bytes(&v).unwrap_err(),
149            CanonicalJsonError::FloatNotAllowed
150        );
151    }
152
153    #[test]
154    fn canonical_json_rejects_secrets() {
155        let v = serde_json::json!({"mnemonic": "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about"});
156        assert_eq!(
157            canonical_json_bytes(&v).unwrap_err(),
158            CanonicalJsonError::SecretsNotAllowed
159        );
160    }
161
162    #[test]
163    fn artifact_id_sha256_lowercase_hex() {
164        // SHA-256("hello") = 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824
165        let id = artifact_id_for_bytes(b"hello");
166        assert_eq!(
167            id.0,
168            "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
169        );
170        assert_eq!(id.0.len(), 64);
171        assert!(id
172            .0
173            .chars()
174            .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()));
175    }
176}