Skip to main content

wasm4pm_types/
hash.rs

1use serde::{Deserialize, Serialize};
2
3/// BLAKE3 hash wrapper (256-bit = 64 hex characters)
4#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
5pub struct Blake3Hash(String);
6
7impl Blake3Hash {
8    /// Create a Blake3Hash from a 64-character lowercase hex string.
9    ///
10    /// PR #66 doctrine fix: prior to this version, `is_ascii_hexdigit` matched
11    /// both uppercase and lowercase, so two distinct `Blake3Hash` values could
12    /// be constructed from the same underlying digest (e.g. "ab..." vs "AB...").
13    /// Receipts and provenance chains compare hashes via `PartialEq` on the
14    /// inner string, so mixed-case acceptance produced silent equality failures.
15    /// Canonical BLAKE3 hex is lowercase — reject everything else.
16    pub fn from_hex(hex: String) -> Result<Self, String> {
17        if hex.len() != 64 {
18            return Err(format!("Invalid hash length: {} (expected 64)", hex.len()));
19        }
20        if !hex.chars().all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c)) {
21            return Err(
22                "Hash must be lowercase hex (digits 0-9, letters a-f only)".to_string(),
23            );
24        }
25        Ok(Blake3Hash(hex))
26    }
27
28    /// Get the hex representation
29    pub fn as_hex(&self) -> &str {
30        &self.0
31    }
32
33    /// Convert to owned hex string
34    pub fn to_hex(&self) -> String {
35        self.0.clone()
36    }
37}
38
39impl std::fmt::Display for Blake3Hash {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        write!(f, "{}", self.0)
42    }
43}
44
45impl AsRef<str> for Blake3Hash {
46    fn as_ref(&self) -> &str {
47        &self.0
48    }
49}
50
51/// Canonical deterministic JSON with sorted keys.
52///
53/// PR #54 NaN class: `serde_json::Value::Number` cannot represent NaN/Inf, so
54/// `serde_json::to_value` of an `f64::NAN` returns `Err` — but it can be easy
55/// to wrap that error and emit a hash anyway. We make rejection explicit by
56/// scanning the produced `Value` for any number that fails to serialise
57/// (which under serde_json signals non-finite at the point we deserialised),
58/// and we deny it as a serialization error.
59pub fn canonical_json<T: serde::Serialize>(value: &T) -> Result<String, serde_json::Error> {
60    let json = serde_json::to_value(value)?;
61    // Defense in depth: any number in the produced Value should round-trip
62    // through f64 finitely. If it doesn't, we've been given a custom Number
63    // type via a feature flag — refuse it to keep hashes deterministic.
64    reject_non_finite_numbers(&json)?;
65    serde_json::to_string(&sort_json_value(&json))
66}
67
68fn reject_non_finite_numbers(value: &serde_json::Value) -> Result<(), serde_json::Error> {
69    match value {
70        serde_json::Value::Number(n) => {
71            if let Some(f) = n.as_f64() {
72                if !f.is_finite() {
73                    // Construct a serde_json error by attempting an invalid op.
74                    return Err(serde::de::Error::custom(
75                        "canonical_json: non-finite number (NaN/Inf) is not canonicalizable",
76                    ));
77                }
78            }
79            Ok(())
80        }
81        serde_json::Value::Array(arr) => {
82            for v in arr {
83                reject_non_finite_numbers(v)?;
84            }
85            Ok(())
86        }
87        serde_json::Value::Object(map) => {
88            for (_k, v) in map {
89                reject_non_finite_numbers(v)?;
90            }
91            Ok(())
92        }
93        _ => Ok(()),
94    }
95}
96
97/// Recursively sort all object keys in JSON value for deterministic output
98fn sort_json_value(value: &serde_json::Value) -> serde_json::Value {
99    match value {
100        serde_json::Value::Object(map) => {
101            let mut sorted: Vec<_> = map.iter().collect();
102            sorted.sort_by(|a, b| a.0.cmp(b.0));
103            let mut new_map = serde_json::Map::new();
104            for (k, v) in sorted {
105                new_map.insert(k.clone(), sort_json_value(v));
106            }
107            serde_json::Value::Object(new_map)
108        }
109        serde_json::Value::Array(arr) => {
110            serde_json::Value::Array(arr.iter().map(sort_json_value).collect())
111        }
112        other => other.clone(),
113    }
114}
115
116/// Compute BLAKE3 hash of bytes, returning 64-char hex string
117pub fn blake3_hex(data: &[u8]) -> String {
118    let hash = blake3::hash(data);
119    hash.to_hex().to_string()
120}
121
122/// Compute BLAKE3 hash of a string
123pub fn blake3_string(data: &str) -> String {
124    blake3_hex(data.as_bytes())
125}
126
127/// Compute BLAKE3 hash of concatenated hashes (for combined_hash).
128///
129/// PR #66 doctrine fix: the original concatenated without a separator, which
130/// meant `["aa", "bb"]` and `["a", "abb"]` produced identical input bytes and
131/// therefore identical combined hashes. We now length-prefix each input
132/// (length as little-endian u64, in hex) before concatenation, which makes
133/// the encoding injective: distinct input sequences map to distinct strings.
134pub fn blake3_combined(hashes: &[&str]) -> String {
135    let mut combined = String::new();
136    for h in hashes {
137        // 16 hex chars = 64-bit length, more than enough for any hash string.
138        combined.push_str(&format!("{:016x}:", h.len()));
139        combined.push_str(h);
140    }
141    blake3_hex(combined.as_bytes())
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn test_blake3_hash_creation() {
150        let hex = "a".repeat(64);
151        let hash = Blake3Hash::from_hex(hex.clone()).unwrap();
152        assert_eq!(hash.as_hex(), hex);
153    }
154
155    #[test]
156    fn test_blake3_invalid_length() {
157        let result = Blake3Hash::from_hex("a".repeat(128));
158        assert!(result.is_err());
159    }
160
161    #[test]
162    fn test_blake3_string_hash() {
163        let hash1 = blake3_string("test");
164        let hash2 = blake3_string("test");
165        assert_eq!(hash1, hash2);
166        assert_eq!(hash1.len(), 64);
167    }
168
169    #[test]
170    fn test_canonical_json() {
171        let mut map1 = serde_json::Map::new();
172        map1.insert("z".to_string(), serde_json::json!(1));
173        map1.insert("a".to_string(), serde_json::json!(2));
174
175        let mut map2 = serde_json::Map::new();
176        map2.insert("a".to_string(), serde_json::json!(2));
177        map2.insert("z".to_string(), serde_json::json!(1));
178
179        let val1 = serde_json::Value::Object(map1);
180        let val2 = serde_json::Value::Object(map2);
181
182        let json1 = canonical_json(&val1).unwrap();
183        let json2 = canonical_json(&val2).unwrap();
184
185        assert_eq!(json1, json2);
186        assert!(json1.starts_with(r#"{"a":2"#)); // Keys sorted alphabetically
187    }
188
189    #[test]
190    fn test_blake3_combined() {
191        let hash1 = "a".repeat(64);
192        let hash2 = "b".repeat(64);
193        let combined = blake3_combined(&[&hash1, &hash2]);
194        assert_eq!(combined.len(), 64);
195    }
196
197    /// Rank-2 (domain contract): canonical BLAKE3 hex is lowercase. Mixed-case
198    /// input must be rejected, because two `Blake3Hash` values with the same
199    /// digest but different case would not be `PartialEq`-equal — that breaks
200    /// receipt comparison.
201    #[test]
202    fn from_hex_rejects_uppercase() {
203        let upper = "A".repeat(64);
204        assert!(Blake3Hash::from_hex(upper).is_err());
205        let mixed = format!("{}{}", "a".repeat(32), "A".repeat(32));
206        assert!(Blake3Hash::from_hex(mixed).is_err());
207        let lower = "a".repeat(64);
208        assert!(Blake3Hash::from_hex(lower).is_ok());
209    }
210
211    /// Rank-1 (mathematical theorem): the concatenation function used inside
212    /// `blake3_combined` must be injective over `&[&str]`. Equivalently: any
213    /// two distinct input slices must produce distinct hashes.
214    /// Regression for PR #66 — the original concatenated without separators,
215    /// so `["aa","bb"] == ["a","abb"]` as byte streams and they collided.
216    #[test]
217    fn blake3_combined_is_injective_on_split_boundary() {
218        let h1 = blake3_combined(&["aa", "bb"]);
219        let h2 = blake3_combined(&["a", "abb"]);
220        assert_ne!(
221            h1, h2,
222            "blake3_combined must distinguish split boundaries (PR #66)"
223        );
224        let h3 = blake3_combined(&["", "aabb"]);
225        let h4 = blake3_combined(&["aabb", ""]);
226        assert_ne!(h3, h4, "blake3_combined must distinguish empty placement");
227    }
228
229    /// Rank-1: canonical_json must refuse NaN/Inf rather than silently emit a
230    /// non-canonical representation. Receipts that include NaN floats would
231    /// otherwise produce stable-looking but provably-meaningless hashes.
232    #[test]
233    fn canonical_json_rejects_non_finite_numbers() {
234        // serde_json::to_value of f64::NAN already errs, so test goes through a
235        // hand-crafted Value carrying a number.
236        let mut map = serde_json::Map::new();
237        map.insert(
238            "x".to_string(),
239            serde_json::Value::Number(
240                serde_json::Number::from_f64(1.5).expect("finite number"),
241            ),
242        );
243        let v = serde_json::Value::Object(map);
244        assert!(canonical_json(&v).is_ok());
245
246        // Direct NaN attempt — must err either at to_value or at our check.
247        let nan_attempt = serde_json::Number::from_f64(f64::NAN);
248        assert!(
249            nan_attempt.is_none(),
250            "serde_json itself must already reject NaN at construction"
251        );
252    }
253}