use super::*;
use unicode_normalization::UnicodeNormalization;
pub fn normalize_unicode(s: &str) -> String {
s.nfkd().collect()
}
pub fn default_hasher(content: &[u8]) -> String {
let mut hasher = tiny_keccak::Keccak::new_sha3_256();
let mut hash_output = [0u8; 32];
hasher.update(content);
hasher.finalize(&mut hash_output);
multibase::encode(multibase::Base::Base64Url, &hash_output)
}
fn hash_str(content: &str) -> String {
format!("cj{}", default_hasher(content.as_bytes()))
}
pub fn canonical_json(data: &serde_json::Value) -> Result<String> {
match data {
serde_json::Value::Array(arr) => {
let mut canonical_json_items = Vec::new();
for item in arr {
canonical_json_items.push(canonical_json(item)?);
}
Ok(format!("[{}]", canonical_json_items.join(",")))
}
serde_json::Value::Object(obj) => {
let mut canonical_json_entries = Vec::new();
let mut keys: Vec<_> = obj.keys().collect();
keys.sort();
for key in keys {
ensure!(
*key == normalize_unicode(key),
"Data for canonical JSON serialization must contain field names normalized with Unicode NFKD"
);
let value = obj.get(key).expect("serde_json keys() impl error");
let canonical_key = canonical_json(&serde_json::Value::String(key.to_owned()))?;
let entry = format!("{}:{}", canonical_key, canonical_json(value)?);
canonical_json_entries.push(entry);
}
Ok(format!("{{{}}}", canonical_json_entries.join(",")))
}
_ => {
let data_str = serde_json::to_string(data).expect("serde_json implementation error");
Ok(normalize_unicode(&data_str))
}
}
}
pub fn mask_json_subtree<'a, 'b>(
data: &'a serde_json::Value, keep_paths: impl AsRef<[&'b str]>,
) -> Result<serde_json::Value> {
match data {
serde_json::Value::Array(arr) => {
let mut canonical_json_items = Vec::new();
for item in arr {
let digested_item = mask_json_subtree(item, vec![])?;
canonical_json_items.push(serde_json::to_string(&digested_item)?);
}
let flattened_array = format!("[{}]", canonical_json_items.join(","));
let content_hash = hash_str(&flattened_array);
Ok(serde_json::Value::String(content_hash))
}
serde_json::Value::Object(obj) => {
let mut keep_head_tails = HashMap::new();
for path in keep_paths.as_ref() {
let (head, tail_opt) = json_path::split_head_tail(path)?;
let tails = keep_head_tails.entry(head.to_owned()).or_insert_with(Vec::new);
if let Some(tail) = tail_opt {
tails.push(tail);
}
}
let mut mask_root = true;
let mut canonical_json_entries = Vec::new();
let mut keys: Vec<_> = obj.keys().collect();
keys.sort();
for key in keys {
ensure!(
*key == normalize_unicode(key),
"Data to be digested must contain field names normalized with Unicode NFKD"
);
let value = obj.get(key).expect("serde_json keys() impl error");
if let Some(tails) = keep_head_tails.get(key) {
mask_root = false;
if tails.is_empty() {
canonical_json_entries.push((key, value.to_owned()));
} else {
let partial_value = mask_json_subtree(value, tails.to_owned())?;
canonical_json_entries.push((key, partial_value));
}
} else {
let fully_masked_value = mask_json_subtree(value, vec![])?;
canonical_json_entries.push((key, fully_masked_value));
};
}
if mask_root {
let canonical_entry_strs = canonical_json_entries
.iter()
.filter_map(|(key, val)| {
let canonical_key =
canonical_json(&serde_json::Value::String((*key).to_string())).ok()?;
Some(format!("{}:{}", canonical_key, serde_json::to_string(val).ok()?))
})
.collect::<Vec<_>>();
ensure!(
canonical_entry_strs.len() == canonical_json_entries.len(),
"Implementation error: failed to serialize JSON node entries"
);
let flattened_object = format!("{{{}}}", canonical_entry_strs.join(","));
let content_hash = hash_str(&flattened_object);
Ok(serde_json::Value::String(content_hash))
} else {
let mut properties = serde_json::Map::new();
for (key, value) in canonical_json_entries {
properties.insert(key.to_owned(), value);
}
Ok(serde_json::Value::Object(properties))
}
}
_ => Ok(data.clone()),
}
}
pub fn selective_digest_json(
json_value: &serde_json::Value, keep_paths_str: &str,
) -> Result<String> {
let keep_paths_vec = json_path::split_alternatives(keep_paths_str);
let digest_json = match &json_value {
serde_json::Value::Object(_obj) => mask_json_subtree(&json_value, keep_paths_vec),
serde_json::Value::Array(_arr) => mask_json_subtree(&json_value, keep_paths_vec),
serde_json::Value::String(_s) => Ok(json_value.to_owned()),
_ => bail!("Json digest is currently implemented only for composite types"),
}?;
match digest_json {
serde_json::Value::String(digest) => Ok(digest),
serde_json::Value::Object(_) => canonical_json(&digest_json),
_ => bail!("Implementation error: digest should always return a string or object"),
}
}
pub fn selective_digest_data<T: serde::Serialize>(
data: &T, keep_paths_str: &str,
) -> Result<String> {
let json_value = serde_json::to_value(&data)?;
selective_digest_json(&json_value, keep_paths_str)
}
pub fn selective_digest_json_str(json_str: &str, keep_paths_str: &str) -> Result<String> {
ensure!(
json_str == normalize_unicode(json_str),
"Json string to be digested must be normalized with Unicode NFKD"
);
let json_value: serde_json::Value = serde_json::from_str(json_str)?;
selective_digest_json(&json_value, keep_paths_str)
}
const KEEP_NOTHING: &str = "";
pub fn digest_data<T: serde::Serialize>(data: &T) -> Result<String> {
selective_digest_data(data, KEEP_NOTHING)
}
pub fn digest_json_str(json_str: &str) -> Result<String> {
selective_digest_json_str(json_str, KEEP_NOTHING)
}
#[cfg(test)]
mod tests {
use super::*;
use hex::FromHex;
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Deserialize, Serialize)]
struct TestData {
b: u32,
a: u32,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
struct CompositeTestData<T> {
z: Option<T>,
y: Option<T>,
}
#[test]
fn reject_non_nfkd() -> Result<()> {
let key_nfc = String::from_utf8(Vec::from_hex("c3a16c6f6d")?)?;
let key_nfkd = String::from_utf8(Vec::from_hex("61cc816c6f6d")?)?;
assert_eq!(key_nfc, "álom");
assert_eq!(key_nfkd, "álom");
let str_nfc = format!("{{\"{}\": 1}}", key_nfc);
let str_nfkd = format!("{{\"{}\": 1}}", key_nfkd);
assert_eq!(digest_json_str(&str_nfkd)?, "cjuRab8yOeLzxmFY_fEMC79cW5z9XyihRhaGnTSvMabrA8");
assert!(digest_json_str(&str_nfc).is_err());
let json_value_nfc: serde_json::Value = serde_json::from_str(&str_nfc)?;
let json_value_nfkd: serde_json::Value = serde_json::from_str(&str_nfkd)?;
assert_eq!(
selective_digest_json(&json_value_nfkd, "")?,
"cjuRab8yOeLzxmFY_fEMC79cW5z9XyihRhaGnTSvMabrA8"
);
assert!(selective_digest_json(&json_value_nfc, "").is_err());
Ok(())
}
#[test]
fn digest_string_is_idempotent() {
let content_id = &r#""cjuzC-XxgzNMwYXtw8aMIAeS2Xjlw1hlSNKTvVtUwPuyYo""#;
let digest_id = digest_data(content_id).unwrap();
assert_eq!(content_id, &digest_id);
}
#[test]
fn test_json_digest() -> Result<()> {
let test_obj = TestData { b: 1, a: 2 };
{
let digested = digest_data(&test_obj)?;
assert_eq!(digested, "cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU");
}
{
let digested = digest_data(&[&test_obj, &test_obj])?;
assert_eq!(digested, "cjuGkDpb1HL7F8xFKDFVj3felfKZzjrJy92-108uuPixNw");
}
{
let digested =
digest_data(&(&test_obj, "cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU"))?;
assert_eq!(digested, "cjuGkDpb1HL7F8xFKDFVj3felfKZzjrJy92-108uuPixNw");
}
{
let digested = digest_data(&[
"cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU",
"cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU",
])?;
assert_eq!(digested, "cjuGkDpb1HL7F8xFKDFVj3felfKZzjrJy92-108uuPixNw");
}
{
let x = &test_obj;
let comp = CompositeTestData { z: Some(x.clone()), y: Some(x.clone()) };
let digested = digest_data(&comp)?;
assert_eq!(digested, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
}
{
let comp = CompositeTestData {
z: Some("cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU".to_owned()),
y: Some("cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU".to_owned()),
};
let digested = digest_data(&comp)?;
assert_eq!(digested, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
}
Ok(())
}
#[test]
fn test_selective_digesting() -> Result<()> {
let test_obj = TestData { b: 1, a: 2 };
let x = &test_obj;
let composite = CompositeTestData { z: Some(x.clone()), y: Some(x.clone()) };
let double_complex =
CompositeTestData { z: Some(composite.clone()), y: Some(composite.clone()) };
let triple_complex =
CompositeTestData { z: Some(double_complex.clone()), y: Some(double_complex.clone()) };
{
let fully_digested = selective_digest_data(&composite, "")?;
assert_eq!(fully_digested, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
}
{
let keep_y = selective_digest_data(&composite, ".y")?;
assert_eq!(
keep_y,
r#"{"y":{"a":2,"b":1},"z":"cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU"}"#
);
let val: serde_json::Value = serde_json::from_str(&keep_y)?;
assert_eq!(digest_data(&val)?, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
}
{
let keep_z = selective_digest_data(&composite, ".z")?;
assert_eq!(
keep_z,
r#"{"y":"cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU","z":{"a":2,"b":1}}"#
);
let val: serde_json::Value = serde_json::from_str(&keep_z)?;
assert_eq!(digest_data(&val)?, "cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ");
}
{
let digest = digest_data(&double_complex)?;
assert_eq!(digest, "cjuQLebyl_BJipFLibhWiStDBqK5J4JZq15ehUqybfTTKA");
}
{
let keep_yz = selective_digest_data(&double_complex, ".y.z")?;
assert_eq!(
keep_yz,
r#"{"y":{"y":"cjumTq1s6Tn6xkXolxHj4LmAo7DAb-zoPLhEa1BvpovAFU","z":{"a":2,"b":1}},"z":"cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ"}"#
);
let val: serde_json::Value = serde_json::from_str(&keep_yz)?;
assert_eq!(digest_data(&val)?, "cjuQLebyl_BJipFLibhWiStDBqK5J4JZq15ehUqybfTTKA");
}
{
let digest = digest_data(&triple_complex)?;
assert_eq!(digest, "cjuik140L3w7LCi6z1eHt7Qgwr2X65-iy8HA6zqrlUdmVk");
}
{
let keep_yz = selective_digest_data(&triple_complex, ".y.y , .z.z")?;
assert_eq!(
keep_yz,
r#"{"y":{"y":{"y":{"a":2,"b":1},"z":{"a":2,"b":1}},"z":"cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ"},"z":{"y":"cjubdcpA0FfHhD8yEpDzZ8vS5sm7yxkrX_wAJgmke2bWRQ","z":{"y":{"a":2,"b":1},"z":{"a":2,"b":1}}}}"#
);
let val: serde_json::Value = serde_json::from_str(&keep_yz)?;
assert_eq!(digest_data(&val)?, "cjuik140L3w7LCi6z1eHt7Qgwr2X65-iy8HA6zqrlUdmVk");
}
Ok(())
}
}