1use super::*;
2
3use tiny_keccak::Hasher;
4use unicode_normalization::UnicodeNormalization;
5
6pub fn normalize_unicode(s: &str) -> String {
10 s.nfkd().collect()
11}
12
13pub fn default_hasher(content: &[u8]) -> String {
17 let mut hasher = tiny_keccak::Sha3::v256();
19 let mut hash_output = [0u8; 32];
20 hasher.update(content);
21 hasher.finalize(&mut hash_output);
22 multibase::encode(multibase::Base::Base64Url, &hash_output)
23}
24
25fn hash_str(content: &str) -> String {
29 format!("cj{}", default_hasher(content.as_bytes()))
30}
31
32pub fn canonical_json(data: &serde_json::Value) -> Result<String> {
48 match data {
49 serde_json::Value::Array(arr) => {
50 let mut canonical_json_items = Vec::new();
51 for item in arr {
52 canonical_json_items.push(canonical_json(item)?);
53 }
54 Ok(format!("[{}]", canonical_json_items.join(",")))
55 }
56
57 serde_json::Value::Object(obj) => {
58 let mut canonical_json_entries = Vec::new();
59 let mut keys: Vec<_> = obj.keys().collect();
60 keys.sort();
61 for key in keys {
62 ensure!(
63 *key == normalize_unicode(key),
64 "Data for canonical JSON serialization must contain field names normalized with Unicode NFKD"
65 );
66
67 let value = obj.get(key).expect("serde_json keys() impl error");
68 let canonical_key = canonical_json(&serde_json::Value::String(key.to_owned()))?;
69 let entry = format!("{}:{}", canonical_key, canonical_json(value)?);
70 canonical_json_entries.push(entry);
71 }
72 Ok(format!("{{{}}}", canonical_json_entries.join(",")))
74 }
75
76 _ => {
77 let data_str = serde_json::to_string(data).expect("serde_json implementation error");
78 Ok(normalize_unicode(&data_str))
79 }
80 }
81}
82
83pub fn mask_json_subtree<'a, 'b>(
90 data: &'a serde_json::Value, keep_paths: impl AsRef<[&'b str]>,
91) -> Result<serde_json::Value> {
92 match data {
93 serde_json::Value::Array(arr) => {
95 let mut canonical_json_items = Vec::new();
96 for item in arr {
97 let digested_item = mask_json_subtree(item, vec![])?;
98 canonical_json_items.push(serde_json::to_string(&digested_item)?);
99 }
100 let flattened_array = format!("[{}]", canonical_json_items.join(","));
101 let content_hash = hash_str(&flattened_array);
103 Ok(serde_json::Value::String(content_hash))
104 }
105
106 serde_json::Value::Object(obj) => {
107 let mut keep_head_tails = HashMap::new();
109 for path in keep_paths.as_ref() {
110 let (head, tail_opt) = json_path::split_head_tail(path)?;
111 let tails = keep_head_tails.entry(head.to_owned()).or_insert_with(Vec::new);
112 if let Some(tail) = tail_opt {
113 tails.push(tail);
114 }
115 }
116
117 let mut mask_root = true;
118 let mut canonical_json_entries = Vec::new();
119 let mut keys: Vec<_> = obj.keys().collect();
120 keys.sort();
121 for key in keys {
122 ensure!(
123 *key == normalize_unicode(key),
124 "Data to be digested must contain field names normalized with Unicode NFKD"
125 );
126
127 let value = obj.get(key).expect("serde_json keys() impl error");
128 if let Some(tails) = keep_head_tails.get(key) {
129 mask_root = false;
131 if tails.is_empty() {
132 canonical_json_entries.push((key, value.to_owned()));
134 } else {
135 let partial_value = mask_json_subtree(value, tails)?;
137 canonical_json_entries.push((key, partial_value));
138 }
139 } else {
140 let fully_masked_value = mask_json_subtree(value, vec![])?;
142 canonical_json_entries.push((key, fully_masked_value));
143 };
144 }
145
146 if mask_root {
147 let canonical_entry_strs = canonical_json_entries
148 .iter()
149 .filter_map(|(key, val)| {
150 let canonical_key =
151 canonical_json(&serde_json::Value::String((*key).to_string())).ok()?;
152 Some(format!("{}:{}", canonical_key, serde_json::to_string(val).ok()?))
153 })
154 .collect::<Vec<_>>();
155 ensure!(
156 canonical_entry_strs.len() == canonical_json_entries.len(),
157 "Implementation error: failed to serialize JSON node entries"
158 );
159
160 let flattened_object = format!("{{{}}}", canonical_entry_strs.join(","));
162
163 let content_hash = hash_str(&flattened_object);
164 Ok(serde_json::Value::String(content_hash))
165 } else {
166 let mut properties = serde_json::Map::new();
167 for (key, value) in canonical_json_entries {
168 properties.insert(key.to_owned(), value);
169 }
170 Ok(serde_json::Value::Object(properties))
171 }
172 }
173
174 _ => Ok(data.clone()),
175 }
176}
177
178pub fn selective_digest_json(
185 json_value: &serde_json::Value, keep_paths_str: &str,
186) -> Result<String> {
187 let keep_paths_vec = json_path::split_alternatives(keep_paths_str);
188 let digest_json = match &json_value {
189 serde_json::Value::Object(_obj) => mask_json_subtree(json_value, keep_paths_vec),
190 serde_json::Value::Array(_arr) => mask_json_subtree(json_value, keep_paths_vec),
191 serde_json::Value::String(_s) => Ok(json_value.to_owned()),
192 _ => bail!("Json digest is currently implemented only for composite types"),
193 }?;
194 match digest_json {
195 serde_json::Value::String(digest) => Ok(digest),
196 serde_json::Value::Object(_) => canonical_json(&digest_json),
198 _ => bail!("Implementation error: digest should always return a string or object"),
199 }
200}
201
202pub fn selective_digest_data<T: serde::Serialize>(
206 data: &T, keep_paths_str: &str,
207) -> Result<String> {
208 let json_value = serde_json::to_value(&data)?;
209 selective_digest_json(&json_value, keep_paths_str)
210}
211
212pub fn selective_digest_json_str(json_str: &str, keep_paths_str: &str) -> Result<String> {
216 ensure!(
217 json_str == normalize_unicode(json_str),
218 "Json string to be digested must be normalized with Unicode NFKD"
219 );
220
221 let json_value: serde_json::Value = serde_json::from_str(json_str)?;
222 selective_digest_json(&json_value, keep_paths_str)
223}
224
225const KEEP_NOTHING: &str = "";
226
227pub fn digest_data<T: serde::Serialize>(data: &T) -> Result<String> {
229 selective_digest_data(data, KEEP_NOTHING)
230}
231
232pub fn digest_json_str(json_str: &str) -> Result<String> {
234 selective_digest_json_str(json_str, KEEP_NOTHING)
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240 use hex::FromHex;
241 use serde::{Deserialize, Serialize};
242
243 #[derive(Clone, Debug, Deserialize, Serialize)]
244 struct TestData {
245 b: u32,
246 a: u32,
247 }
248
249 #[derive(Clone, Debug, Deserialize, Serialize)]
250 struct CompositeTestData<T> {
251 z: Option<T>,
252 y: Option<T>,
253 }
254
255 #[test]
256 fn reject_non_nfkd() -> Result<()> {
257 let key_nfc = String::from_utf8(Vec::from_hex("c3a16c6f6d")?)?;
258 let key_nfkd = String::from_utf8(Vec::from_hex("61cc816c6f6d")?)?;
259 assert_eq!(key_nfc, "álom");
260 assert_eq!(key_nfkd, "álom");
261
262 let str_nfc = format!("{{\"{}\": 1}}", key_nfc);
263 let str_nfkd = format!("{{\"{}\": 1}}", key_nfkd);
264 assert_eq!(digest_json_str(&str_nfkd)?, "cjuRab8yOeLzxmFY_fEMC79cW5z9XyihRhaGnTSvMabrA8");
265 assert!(digest_json_str(&str_nfc).is_err());
266
267 let json_value_nfc: serde_json::Value = serde_json::from_str(&str_nfc)?;
268 let json_value_nfkd: serde_json::Value = serde_json::from_str(&str_nfkd)?;
269 assert_eq!(
270 selective_digest_json(&json_value_nfkd, "")?,
271 "cjuRab8yOeLzxmFY_fEMC79cW5z9XyihRhaGnTSvMabrA8"
272 );
273 assert!(selective_digest_json(&json_value_nfc, "").is_err());
274 Ok(())
275 }
276
277 #[test]
278 fn digest_string_is_idempotent() {
279 let content_id = &r#""cjuzC-XxgzNMwYXtw8aMIAeS2Xjlw1hlSNKTvVtUwPuyYo""#;
280 let digest_id = digest_data(content_id).unwrap();
281 assert_eq!(content_id, &digest_id);
282 }
283
284 #[test]
285 fn test_json_digest() -> Result<()> {
286 let test_obj = TestData { b: 1, a: 2 };
287 {
288 let digested = digest_data(&test_obj)?;
289 assert_eq!(digested, "cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU");
290 }
291 {
292 let digested = digest_data(&[&test_obj, &test_obj])?;
293 assert_eq!(digested, "cjuGkDpb1HL7F8xFKDFVj3felfKZzjrJy92-108uuPixNw");
294 }
295 {
296 let digested =
297 digest_data(&(&test_obj, "cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU"))?;
298 assert_eq!(digested, "cjuGkDpb1HL7F8xFKDFVj3felfKZzjrJy92-108uuPixNw");
299 }
300 {
301 let digested = digest_data(&[
302 "cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU",
303 "cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU",
304 ])?;
305 assert_eq!(digested, "cjuGkDpb1HL7F8xFKDFVj3felfKZzjrJy92-108uuPixNw");
306 }
307 {
308 let x = &test_obj;
309 let comp = CompositeTestData { z: Some(x.clone()), y: Some(x.clone()) };
310 let digested = digest_data(&comp)?;
311 assert_eq!(digested, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
312 }
313 {
314 let comp = CompositeTestData {
315 z: Some("cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU".to_owned()),
316 y: Some("cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU".to_owned()),
317 };
318 let digested = digest_data(&comp)?;
319 assert_eq!(digested, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
320 }
321 Ok(())
322 }
323
324 #[test]
325 fn test_selective_digesting() -> Result<()> {
326 let test_obj = TestData { b: 1, a: 2 };
327 let x = &test_obj;
328 let composite = CompositeTestData { z: Some(x.clone()), y: Some(x.clone()) };
329 let double_complex =
330 CompositeTestData { z: Some(composite.clone()), y: Some(composite.clone()) };
331 let triple_complex =
332 CompositeTestData { z: Some(double_complex.clone()), y: Some(double_complex.clone()) };
333 {
334 let fully_digested = selective_digest_data(&composite, "")?;
335 assert_eq!(fully_digested, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
336 }
337 {
338 let keep_y = selective_digest_data(&composite, ".y")?;
339 assert_eq!(
340 keep_y,
341 r#"{"y":{"a":2,"b":1},"z":"cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU"}"#
342 );
343 let val: serde_json::Value = serde_json::from_str(&keep_y)?;
344 assert_eq!(digest_data(&val)?, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
345 }
346 {
347 let keep_z = selective_digest_data(&composite, ".z")?;
348 assert_eq!(
349 keep_z,
350 r#"{"y":"cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU","z":{"a":2,"b":1}}"#
351 );
352 let val: serde_json::Value = serde_json::from_str(&keep_z)?;
353 assert_eq!(digest_data(&val)?, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
354 }
355 {
356 let digest = digest_data(&double_complex)?;
357 assert_eq!(digest, "cjuQLebyl_BJipFLibhWiStDBqK5J4JZq15ehUqybfTTKA");
358 }
359 {
360 let keep_yz = selective_digest_data(&double_complex, ".y.z")?;
361 assert_eq!(
362 keep_yz,
363 r#"{"y":{"y":"cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU","z":{"a":2,"b":1}},"z":"cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ"}"#
364 );
365 let val: serde_json::Value = serde_json::from_str(&keep_yz)?;
366 assert_eq!(digest_data(&val)?, "cjuQLebyl_BJipFLibhWiStDBqK5J4JZq15ehUqybfTTKA");
367 }
368 {
369 let digest = digest_data(&triple_complex)?;
370 assert_eq!(digest, "cjuik140L3w7LCi6z1eHt7Qgwr2X65-iy8HA6zqrlUdmVk");
371 }
372 {
373 let keep_yz = selective_digest_data(&triple_complex, ".y.y , .z.z")?;
374 assert_eq!(
375 keep_yz,
376 r#"{"y":{"y":{"y":{"a":2,"b":1},"z":{"a":2,"b":1}},"z":"cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ"},"z":{"y":"cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ","z":{"y":{"a":2,"b":1},"z":{"a":2,"b":1}}}}"#
377 );
378 let val: serde_json::Value = serde_json::from_str(&keep_yz)?;
379 assert_eq!(digest_data(&val)?, "cjuik140L3w7LCi6z1eHt7Qgwr2X65-iy8HA6zqrlUdmVk");
380 }
381 Ok(())
382 }
383}