mpl_core/
hash.rs

1//! Canonicalization and Semantic Hashing
2//!
3//! BLAKE3 hashes over canonical payloads detect meaning drift
4//! across retries and multi-hop flows.
5
6use serde_json::Value;
7
8use crate::error::Result;
9
10/// Canonicalize a JSON value for deterministic hashing
11///
12/// Steps:
13/// 1. Sort all object keys recursively
14/// 2. Remove null values (optional fields)
15/// 3. Normalize numbers (no trailing zeros)
16/// 4. Serialize with consistent formatting
17pub fn canonicalize(value: &Value) -> Result<String> {
18    let canonical = canonicalize_value(value);
19    Ok(serde_json::to_string(&canonical)?)
20}
21
22/// Recursively canonicalize a JSON value
23fn canonicalize_value(value: &Value) -> Value {
24    match value {
25        Value::Object(map) => {
26            // Sort keys and recursively canonicalize values
27            let mut sorted: Vec<_> = map
28                .iter()
29                .filter(|(_, v)| !v.is_null()) // Remove null values
30                .map(|(k, v)| (k.clone(), canonicalize_value(v)))
31                .collect();
32            sorted.sort_by(|a, b| a.0.cmp(&b.0));
33            Value::Object(sorted.into_iter().collect())
34        }
35        Value::Array(arr) => {
36            // Recursively canonicalize array elements (preserve order)
37            Value::Array(arr.iter().map(canonicalize_value).collect())
38        }
39        Value::Number(n) => {
40            // Normalize numbers: convert to f64 and back to remove trailing zeros
41            if let Some(f) = n.as_f64() {
42                // Check if it's actually an integer
43                if f.fract() == 0.0 && f.abs() < (i64::MAX as f64) {
44                    Value::Number(serde_json::Number::from(f as i64))
45                } else {
46                    // Round to 6 decimal places for consistency
47                    let rounded = (f * 1_000_000.0).round() / 1_000_000.0;
48                    serde_json::Number::from_f64(rounded)
49                        .map(Value::Number)
50                        .unwrap_or_else(|| Value::Number(n.clone()))
51                }
52            } else {
53                Value::Number(n.clone())
54            }
55        }
56        Value::String(s) => {
57            // Trim whitespace for consistency
58            Value::String(s.trim().to_string())
59        }
60        // Bool and Null pass through unchanged
61        other => other.clone(),
62    }
63}
64
65/// Compute the semantic hash of a JSON value
66///
67/// Returns a BLAKE3 hash prefixed with "b3:" for identification
68pub fn semantic_hash(value: &Value) -> Result<String> {
69    let canonical = canonicalize(value)?;
70    let hash = blake3::hash(canonical.as_bytes());
71    Ok(format!("b3:{}", hash.to_hex()))
72}
73
74/// Compute semantic hash from a canonical string (already canonicalized)
75pub fn hash_canonical(canonical: &str) -> String {
76    let hash = blake3::hash(canonical.as_bytes());
77    format!("b3:{}", hash.to_hex())
78}
79
80/// Verify that a payload matches its declared semantic hash
81pub fn verify_hash(value: &Value, expected_hash: &str) -> Result<bool> {
82    let actual = semantic_hash(value)?;
83    Ok(actual == expected_hash)
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89    use serde_json::json;
90
91    #[test]
92    fn test_canonicalize_sorts_keys() {
93        let input = json!({
94            "z": 1,
95            "a": 2,
96            "m": 3
97        });
98        let canonical = canonicalize(&input).unwrap();
99        assert_eq!(canonical, r#"{"a":2,"m":3,"z":1}"#);
100    }
101
102    #[test]
103    fn test_canonicalize_removes_nulls() {
104        let input = json!({
105            "a": 1,
106            "b": null,
107            "c": 3
108        });
109        let canonical = canonicalize(&input).unwrap();
110        assert_eq!(canonical, r#"{"a":1,"c":3}"#);
111    }
112
113    #[test]
114    fn test_canonicalize_nested() {
115        let input = json!({
116            "outer": {
117                "z": 1,
118                "a": 2
119            },
120            "array": [{"b": 2, "a": 1}]
121        });
122        let canonical = canonicalize(&input).unwrap();
123        assert_eq!(
124            canonical,
125            r#"{"array":[{"a":1,"b":2}],"outer":{"a":2,"z":1}}"#
126        );
127    }
128
129    #[test]
130    fn test_semantic_hash_deterministic() {
131        let input = json!({
132            "b": 2,
133            "a": 1
134        });
135        let hash1 = semantic_hash(&input).unwrap();
136        let hash2 = semantic_hash(&input).unwrap();
137        assert_eq!(hash1, hash2);
138        assert!(hash1.starts_with("b3:"));
139    }
140
141    #[test]
142    fn test_semantic_hash_different_order_same_hash() {
143        let input1 = json!({"a": 1, "b": 2});
144        let input2 = json!({"b": 2, "a": 1});
145        let hash1 = semantic_hash(&input1).unwrap();
146        let hash2 = semantic_hash(&input2).unwrap();
147        assert_eq!(hash1, hash2);
148    }
149
150    #[test]
151    fn test_verify_hash() {
152        let input = json!({"test": "value"});
153        let hash = semantic_hash(&input).unwrap();
154        assert!(verify_hash(&input, &hash).unwrap());
155        assert!(!verify_hash(&input, "b3:wrong").unwrap());
156    }
157}