use ciborium::Value;
#[must_use]
pub(crate) fn to_canonical_bytes(value: Value) -> Vec<u8> {
let canonical = canonicalize(value);
let mut out = Vec::new();
ciborium::ser::into_writer(&canonical, &mut out)
.expect("ciborium::ser::into_writer should not fail on Vec<u8>");
out
}
#[must_use]
pub(crate) fn canonicalize(value: Value) -> Value {
match value {
Value::Map(entries) => {
let mut canonical: Vec<(Vec<u8>, Value, Value)> = entries
.into_iter()
.map(|(k, v)| {
let encoded_key = encode_value(&k);
let canonical_v = canonicalize(v);
(encoded_key, canonicalize(k), canonical_v)
})
.collect();
canonical.sort_by(|a, b| {
a.0.len().cmp(&b.0.len()).then_with(|| a.0.cmp(&b.0))
});
Value::Map(canonical.into_iter().map(|(_, k, v)| (k, v)).collect())
}
Value::Array(items) => {
Value::Array(items.into_iter().map(canonicalize).collect())
}
Value::Tag(tag, inner) => Value::Tag(tag, Box::new(canonicalize(*inner))),
v @ (Value::Integer(_)
| Value::Bytes(_)
| Value::Float(_)
| Value::Text(_)
| Value::Bool(_)
| Value::Null) => v,
other => other,
}
}
fn encode_value(value: &Value) -> Vec<u8> {
let mut out = Vec::new();
ciborium::ser::into_writer(value, &mut out)
.expect("ciborium::ser::into_writer should not fail on Vec<u8>");
out
}
pub(crate) fn from_bytes(bytes: &[u8]) -> Result<Value, ()> {
ciborium::de::from_reader(bytes).map_err(|_| ())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn map_entries_canonicalize_to_same_bytes_regardless_of_insertion_order() {
let m1 = Value::Map(vec![
(Value::Text("zebra".into()), Value::Integer(1.into())),
(Value::Text("apple".into()), Value::Integer(2.into())),
(Value::Text("mango".into()), Value::Integer(3.into())),
]);
let m2 = Value::Map(vec![
(Value::Text("apple".into()), Value::Integer(2.into())),
(Value::Text("mango".into()), Value::Integer(3.into())),
(Value::Text("zebra".into()), Value::Integer(1.into())),
]);
let m3 = Value::Map(vec![
(Value::Text("mango".into()), Value::Integer(3.into())),
(Value::Text("zebra".into()), Value::Integer(1.into())),
(Value::Text("apple".into()), Value::Integer(2.into())),
]);
let b1 = to_canonical_bytes(m1);
let b2 = to_canonical_bytes(m2);
let b3 = to_canonical_bytes(m3);
assert_eq!(b1, b2);
assert_eq!(b1, b3);
}
#[test]
fn map_keys_sort_shorter_first_then_lexicographic() {
let m = Value::Map(vec![
(Value::Text("abc".into()), Value::Integer(1.into())),
(Value::Text("a".into()), Value::Integer(2.into())),
(Value::Text("ab".into()), Value::Integer(3.into())),
]);
let canonical = canonicalize(m);
let Value::Map(entries) = canonical else {
panic!("expected Map");
};
let keys: Vec<&str> = entries
.iter()
.map(|(k, _)| match k {
Value::Text(s) => s.as_str(),
_ => unreachable!(),
})
.collect();
assert_eq!(keys, vec!["a", "ab", "abc"]);
}
#[test]
fn same_length_keys_sort_bytewise_lex() {
let m = Value::Map(vec![
(Value::Text("zoo".into()), Value::Integer(1.into())),
(Value::Text("ant".into()), Value::Integer(2.into())),
(Value::Text("dog".into()), Value::Integer(3.into())),
]);
let canonical = canonicalize(m);
let Value::Map(entries) = canonical else {
panic!("expected Map");
};
let keys: Vec<&str> = entries
.iter()
.map(|(k, _)| match k {
Value::Text(s) => s.as_str(),
_ => unreachable!(),
})
.collect();
assert_eq!(keys, vec!["ant", "dog", "zoo"]);
}
#[test]
fn nested_maps_are_recursively_canonicalized() {
let inner_unsorted = Value::Map(vec![
(Value::Text("z".into()), Value::Integer(1.into())),
(Value::Text("a".into()), Value::Integer(2.into())),
]);
let outer = Value::Map(vec![(Value::Text("inner".into()), inner_unsorted)]);
let canonical = canonicalize(outer);
let Value::Map(outer_entries) = canonical else {
panic!("expected outer Map");
};
let inner = &outer_entries[0].1;
let Value::Map(inner_entries) = inner else {
panic!("expected inner Map");
};
let keys: Vec<&str> = inner_entries
.iter()
.map(|(k, _)| match k {
Value::Text(s) => s.as_str(),
_ => unreachable!(),
})
.collect();
assert_eq!(keys, vec!["a", "z"]);
}
#[test]
fn maps_inside_arrays_are_canonicalized() {
let m = Value::Map(vec![
(Value::Text("z".into()), Value::Integer(1.into())),
(Value::Text("a".into()), Value::Integer(2.into())),
]);
let arr = Value::Array(vec![m]);
let canonical = canonicalize(arr);
let Value::Array(items) = canonical else {
panic!("expected Array");
};
let Value::Map(entries) = &items[0] else {
panic!("expected Map inside Array");
};
let first_key = match &entries[0].0 {
Value::Text(s) => s.as_str(),
_ => unreachable!(),
};
assert_eq!(first_key, "a");
}
#[test]
fn canonical_bytes_round_trip_through_decode() {
let original = Value::Map(vec![
(Value::Text("apple".into()), Value::Integer(1.into())),
(Value::Text("zebra".into()), Value::Integer(2.into())),
]);
let bytes = to_canonical_bytes(original.clone());
let decoded = from_bytes(&bytes).unwrap();
let recanonicalized = to_canonical_bytes(decoded);
assert_eq!(bytes, recanonicalized);
}
#[test]
fn non_canonical_input_re_canonicalizes_to_different_bytes() {
let non_canonical: Vec<u8> = vec![
0xA2, 0x65, 0x7A, 0x65, 0x62, 0x72, 0x61, 0x01, 0x65, 0x61, 0x70, 0x70,
0x6C, 0x65, 0x02,
];
let decoded = from_bytes(&non_canonical).unwrap();
let re_canonicalized = to_canonical_bytes(decoded);
assert_ne!(
non_canonical, re_canonicalized,
"non-canonical input should re-canonicalize to different bytes"
);
}
}