1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
use serde::{Deserialize, Serialize};
/// BLAKE3 hash wrapper (256-bit = 64 hex characters)
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct Blake3Hash(String);
impl Blake3Hash {
/// Create a Blake3Hash from a 64-character lowercase hex string.
///
/// PR #66 doctrine fix: prior to this version, `is_ascii_hexdigit` matched
/// both uppercase and lowercase, so two distinct `Blake3Hash` values could
/// be constructed from the same underlying digest (e.g. "ab..." vs "AB...").
/// Receipts and provenance chains compare hashes via `PartialEq` on the
/// inner string, so mixed-case acceptance produced silent equality failures.
/// Canonical BLAKE3 hex is lowercase — reject everything else.
pub fn from_hex(hex: String) -> Result<Self, String> {
if hex.len() != 64 {
return Err(format!("Invalid hash length: {} (expected 64)", hex.len()));
}
if !hex
.chars()
.all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c))
{
return Err("Hash must be lowercase hex (digits 0-9, letters a-f only)".to_string());
}
Ok(Blake3Hash(hex))
}
/// Get the hex representation
pub fn as_hex(&self) -> &str {
&self.0
}
/// Convert to owned hex string
pub fn to_hex(&self) -> String {
self.0.clone()
}
}
impl std::fmt::Display for Blake3Hash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl AsRef<str> for Blake3Hash {
fn as_ref(&self) -> &str {
&self.0
}
}
/// Canonical deterministic JSON with sorted keys.
///
/// PR #54 NaN class: `serde_json::Value::Number` cannot represent NaN/Inf, so
/// `serde_json::to_value` of an `f64::NAN` returns `Err` — but it can be easy
/// to wrap that error and emit a hash anyway. We make rejection explicit by
/// scanning the produced `Value` for any number that fails to serialise
/// (which under serde_json signals non-finite at the point we deserialised),
/// and we deny it as a serialization error.
pub fn canonical_json<T: serde::Serialize>(value: &T) -> Result<String, serde_json::Error> {
let json = serde_json::to_value(value)?;
// Defense in depth: any number in the produced Value should round-trip
// through f64 finitely. If it doesn't, we've been given a custom Number
// type via a feature flag — refuse it to keep hashes deterministic.
reject_non_finite_numbers(&json)?;
serde_json::to_string(&sort_json_value(&json))
}
fn reject_non_finite_numbers(value: &serde_json::Value) -> Result<(), serde_json::Error> {
match value {
serde_json::Value::Number(n) => {
if let Some(f) = n.as_f64() {
if !f.is_finite() {
// Construct a serde_json error by attempting an invalid op.
return Err(serde::de::Error::custom(
"canonical_json: non-finite number (NaN/Inf) is not canonicalizable",
));
}
}
Ok(())
}
serde_json::Value::Array(arr) => {
for v in arr {
reject_non_finite_numbers(v)?;
}
Ok(())
}
serde_json::Value::Object(map) => {
for (_k, v) in map {
reject_non_finite_numbers(v)?;
}
Ok(())
}
_ => Ok(()),
}
}
/// Recursively sort all object keys in JSON value for deterministic output
fn sort_json_value(value: &serde_json::Value) -> serde_json::Value {
match value {
serde_json::Value::Object(map) => {
let mut sorted: Vec<_> = map.iter().collect();
sorted.sort_by(|a, b| a.0.cmp(b.0));
let mut new_map = serde_json::Map::new();
for (k, v) in sorted {
new_map.insert(k.clone(), sort_json_value(v));
}
serde_json::Value::Object(new_map)
}
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.iter().map(sort_json_value).collect())
}
other => other.clone(),
}
}
/// Compute BLAKE3 hash of bytes, returning 64-char hex string
pub fn blake3_hex(data: &[u8]) -> String {
let hash = blake3::hash(data);
hash.to_hex().to_string()
}
/// Compute BLAKE3 hash of a string
pub fn blake3_string(data: &str) -> String {
blake3_hex(data.as_bytes())
}
/// Compute BLAKE3 hash of concatenated hashes (for combined_hash).
///
/// PR #66 doctrine fix: the original concatenated without a separator, which
/// meant `["aa", "bb"]` and `["a", "abb"]` produced identical input bytes and
/// therefore identical combined hashes. We now length-prefix each input
/// (length as little-endian u64, in hex) before concatenation, which makes
/// the encoding injective: distinct input sequences map to distinct strings.
pub fn blake3_combined(hashes: &[&str]) -> String {
let mut combined = String::new();
for h in hashes {
// 16 hex chars = 64-bit length, more than enough for any hash string.
combined.push_str(&format!("{:016x}:", h.len()));
combined.push_str(h);
}
blake3_hex(combined.as_bytes())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_blake3_hash_creation() {
let hex = "a".repeat(64);
let hash = Blake3Hash::from_hex(hex.clone()).unwrap();
assert_eq!(hash.as_hex(), hex);
}
#[test]
fn test_blake3_invalid_length() {
let result = Blake3Hash::from_hex("a".repeat(128));
assert!(result.is_err());
}
#[test]
fn test_blake3_string_hash() {
let hash1 = blake3_string("test");
let hash2 = blake3_string("test");
assert_eq!(hash1, hash2);
assert_eq!(hash1.len(), 64);
}
#[test]
fn test_canonical_json() {
let mut map1 = serde_json::Map::new();
map1.insert("z".to_string(), serde_json::json!(1));
map1.insert("a".to_string(), serde_json::json!(2));
let mut map2 = serde_json::Map::new();
map2.insert("a".to_string(), serde_json::json!(2));
map2.insert("z".to_string(), serde_json::json!(1));
let val1 = serde_json::Value::Object(map1);
let val2 = serde_json::Value::Object(map2);
let json1 = canonical_json(&val1).unwrap();
let json2 = canonical_json(&val2).unwrap();
assert_eq!(json1, json2);
assert!(json1.starts_with(r#"{"a":2"#)); // Keys sorted alphabetically
}
#[test]
fn test_blake3_combined() {
let hash1 = "a".repeat(64);
let hash2 = "b".repeat(64);
let combined = blake3_combined(&[&hash1, &hash2]);
assert_eq!(combined.len(), 64);
}
/// Rank-2 (domain contract): canonical BLAKE3 hex is lowercase. Mixed-case
/// input must be rejected, because two `Blake3Hash` values with the same
/// digest but different case would not be `PartialEq`-equal — that breaks
/// receipt comparison.
#[test]
fn from_hex_rejects_uppercase() {
let upper = "A".repeat(64);
assert!(Blake3Hash::from_hex(upper).is_err());
let mixed = format!("{}{}", "a".repeat(32), "A".repeat(32));
assert!(Blake3Hash::from_hex(mixed).is_err());
let lower = "a".repeat(64);
assert!(Blake3Hash::from_hex(lower).is_ok());
}
/// Rank-1 (mathematical theorem): the concatenation function used inside
/// `blake3_combined` must be injective over `&[&str]`. Equivalently: any
/// two distinct input slices must produce distinct hashes.
/// Regression for PR #66 — the original concatenated without separators,
/// so `["aa","bb"] == ["a","abb"]` as byte streams and they collided.
#[test]
fn blake3_combined_is_injective_on_split_boundary() {
let h1 = blake3_combined(&["aa", "bb"]);
let h2 = blake3_combined(&["a", "abb"]);
assert_ne!(
h1, h2,
"blake3_combined must distinguish split boundaries (PR #66)"
);
let h3 = blake3_combined(&["", "aabb"]);
let h4 = blake3_combined(&["aabb", ""]);
assert_ne!(h3, h4, "blake3_combined must distinguish empty placement");
}
/// Rank-1: canonical_json must refuse NaN/Inf rather than silently emit a
/// non-canonical representation. Receipts that include NaN floats would
/// otherwise produce stable-looking but provably-meaningless hashes.
#[test]
fn canonical_json_rejects_non_finite_numbers() {
// serde_json::to_value of f64::NAN already errs, so test goes through a
// hand-crafted Value carrying a number.
let mut map = serde_json::Map::new();
map.insert(
"x".to_string(),
serde_json::Value::Number(serde_json::Number::from_f64(1.5).expect("finite number")),
);
let v = serde_json::Value::Object(map);
assert!(canonical_json(&v).is_ok());
// Direct NaN attempt — must err either at to_value or at our check.
let nan_attempt = serde_json::Number::from_f64(f64::NAN);
assert!(
nan_attempt.is_none(),
"serde_json itself must already reject NaN at construction"
);
}
}