1use schemars::JsonSchema;
30use serde::{Deserialize, Serialize};
31use serde_json::Value;
32
33#[derive(
37 Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
38)]
39#[serde(transparent)]
40pub struct ContentDigest(pub String);
41
42impl ContentDigest {
43 pub fn compute(data: &[u8]) -> Self {
45 let hash = blake3::hash(data);
46 Self(hash.to_hex().to_string())
47 }
48
49 pub fn compute_str(data: &str) -> Self {
51 Self::compute(data.as_bytes())
52 }
53
54 pub fn compute_json<T: Serialize>(value: &T) -> Result<Self, DigestError> {
63 let canonical = canonicalize_json_value(serde_json::to_value(value).map_err(|e| {
64 DigestError::SerializationFailed {
65 reason: e.to_string(),
66 }
67 })?);
68 let canonical =
69 serde_json::to_string(&canonical).map_err(|e| DigestError::SerializationFailed {
70 reason: e.to_string(),
71 })?;
72 Ok(Self::compute_str(&canonical))
73 }
74
75 pub fn hex(&self) -> &str {
77 &self.0
78 }
79
80 pub fn from_hex(hex: impl Into<String>) -> Result<Self, DigestError> {
84 let hex = hex.into();
85 if hex.len() != 64 {
86 return Err(DigestError::InvalidDigest {
87 reason: format!("expected 64 hex chars, got {}", hex.len()),
88 });
89 }
90 if !hex.chars().all(|c| c.is_ascii_hexdigit()) {
91 return Err(DigestError::InvalidDigest {
92 reason: "digest must contain only hex characters".into(),
93 });
94 }
95 Ok(Self(hex))
96 }
97
98 pub fn from_hex_unchecked(hex: impl Into<String>) -> Self {
102 Self(hex.into())
103 }
104}
105
106impl std::fmt::Display for ContentDigest {
107 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108 f.write_str(&self.0)
109 }
110}
111
112pub struct DigestBuilder {
117 hasher: blake3::Hasher,
118}
119
120impl DigestBuilder {
121 pub fn new() -> Self {
123 Self {
124 hasher: blake3::Hasher::new(),
125 }
126 }
127
128 pub fn update(&mut self, data: &[u8]) -> &mut Self {
130 self.hasher.update(data);
131 self
132 }
133
134 pub fn update_str(&mut self, data: &str) -> &mut Self {
136 self.hasher.update(data.as_bytes());
137 self
138 }
139
140 pub fn separator(&mut self) -> &mut Self {
142 self.hasher.update(b"\x00");
143 self
144 }
145
146 pub fn update_json<T: Serialize + ?Sized>(
148 &mut self,
149 value: &T,
150 ) -> Result<&mut Self, DigestError> {
151 let canonical = canonicalize_json_value(serde_json::to_value(value).map_err(|e| {
152 DigestError::SerializationFailed {
153 reason: e.to_string(),
154 }
155 })?);
156 let canonical =
157 serde_json::to_string(&canonical).map_err(|e| DigestError::SerializationFailed {
158 reason: e.to_string(),
159 })?;
160 self.hasher.update(canonical.as_bytes());
161 Ok(self)
162 }
163
164 pub fn finalize(self) -> ContentDigest {
166 let hash = self.hasher.finalize();
167 ContentDigest(hash.to_hex().to_string())
168 }
169}
170
171fn canonicalize_json_value(value: Value) -> Value {
172 match value {
173 Value::Object(map) => {
174 let mut entries = map
175 .into_iter()
176 .map(|(key, value)| (key, canonicalize_json_value(value)))
177 .collect::<Vec<(String, Value)>>();
178 entries.sort_by(|a, b| a.0.cmp(&b.0));
179 let mut ordered = serde_json::Map::new();
180 for (key, value) in entries {
181 ordered.insert(key, value);
182 }
183 Value::Object(ordered)
184 }
185 Value::Array(items) => {
186 Value::Array(items.into_iter().map(canonicalize_json_value).collect())
187 }
188 other => other,
189 }
190}
191
192impl Default for DigestBuilder {
193 fn default() -> Self {
194 Self::new()
195 }
196}
197
198#[derive(Debug, Clone, PartialEq, Eq)]
200pub enum DigestError {
201 SerializationFailed { reason: String },
203 InvalidDigest { reason: String },
205}
206
207impl DigestError {
208 pub fn kind(&self) -> &'static str {
209 match self {
210 Self::SerializationFailed { .. } => "serialization_failed",
211 Self::InvalidDigest { .. } => "invalid_digest",
212 }
213 }
214}
215
216impl std::fmt::Display for DigestError {
217 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
218 match self {
219 Self::SerializationFailed { reason } => {
220 write!(f, "digest serialization failed: {reason}")
221 }
222 Self::InvalidDigest { reason } => {
223 write!(f, "invalid digest: {reason}")
224 }
225 }
226 }
227}
228
229impl std::error::Error for DigestError {}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234 use std::collections::{BTreeMap, HashMap};
235
236 #[test]
237 fn compute_and_verify_length() {
238 let digest = ContentDigest::compute(b"hello world");
239 assert_eq!(digest.hex().len(), 64);
240 assert!(digest.hex().chars().all(|c| c.is_ascii_hexdigit()));
241 }
242
243 #[test]
244 fn deterministic_same_input() {
245 let a = ContentDigest::compute(b"test data");
246 let b = ContentDigest::compute(b"test data");
247 assert_eq!(a, b);
248 }
249
250 #[test]
251 fn different_input_different_digest() {
252 let a = ContentDigest::compute(b"input A");
253 let b = ContentDigest::compute(b"input B");
254 assert_ne!(a, b);
255 }
256
257 #[test]
258 fn compute_json_deterministic() {
259 let mut map = BTreeMap::new();
260 map.insert("b", "two");
261 map.insert("a", "one");
262 let d1 = ContentDigest::compute_json(&map).unwrap();
263
264 let mut map2 = BTreeMap::new();
265 map2.insert("a", "one");
266 map2.insert("b", "two");
267 let d2 = ContentDigest::compute_json(&map2).unwrap();
268
269 assert_eq!(d1, d2);
271 }
272
273 #[test]
274 fn compute_json_normalizes_hash_map_key_order() {
275 let mut unsorted = HashMap::new();
276 unsorted.insert("b", "two");
277 unsorted.insert("a", "one");
278
279 let mut reordered = HashMap::new();
280 reordered.insert("a", "one");
281 reordered.insert("b", "two");
282
283 let left = ContentDigest::compute_json(&unsorted).unwrap();
284 let right = ContentDigest::compute_json(&reordered).unwrap();
285
286 assert_eq!(left, right);
287 }
288
289 #[test]
290 fn compute_json_matches_pinned_golden_digest() {
291 let mut ordered = BTreeMap::new();
292 ordered.insert("a", serde_json::json!({ "z": 1, "y": [3, 2, 1] }));
293 ordered.insert("b", serde_json::json!("two"));
294
295 let digest = ContentDigest::compute_json(&ordered).unwrap();
296
297 assert_eq!(
298 digest.hex(),
299 "5359182562bfb1083acba7077061a75d451f373026ae4a79c28118403f58cb1f"
300 );
301 }
302
303 #[test]
304 fn from_hex_valid() {
305 let digest = ContentDigest::compute(b"test");
306 let restored = ContentDigest::from_hex(digest.hex()).unwrap();
307 assert_eq!(restored, digest);
308 }
309
310 #[test]
311 fn from_hex_wrong_length() {
312 let err = ContentDigest::from_hex("abc").unwrap_err();
313 assert!(matches!(err, DigestError::InvalidDigest { .. }));
314 }
315
316 #[test]
317 fn from_hex_non_hex_chars() {
318 let err = ContentDigest::from_hex("g".repeat(64)).unwrap_err();
319 assert!(matches!(err, DigestError::InvalidDigest { .. }));
320 }
321
322 #[test]
323 fn builder_deterministic() {
324 let d1 = {
325 let mut b = DigestBuilder::new();
326 b.update_str("field1").separator().update_str("field2");
327 b.finalize()
328 };
329 let d2 = {
330 let mut b = DigestBuilder::new();
331 b.update_str("field1").separator().update_str("field2");
332 b.finalize()
333 };
334 assert_eq!(d1, d2);
335 }
336
337 #[test]
338 fn builder_separator_prevents_collision() {
339 let d1 = {
341 let mut b = DigestBuilder::new();
342 b.update_str("ab").separator().update_str("c");
343 b.finalize()
344 };
345 let d2 = {
346 let mut b = DigestBuilder::new();
347 b.update_str("a").separator().update_str("bc");
348 b.finalize()
349 };
350 assert_ne!(d1, d2);
351 }
352
353 #[test]
354 fn serde_roundtrip() {
355 let digest = ContentDigest::compute(b"test");
356 let json = serde_json::to_string(&digest).unwrap();
357 let back: ContentDigest = serde_json::from_str(&json).unwrap();
358 assert_eq!(back, digest);
359 }
360}