oxgraph_layout_util/keys.rs
1//! Injective composite-key encoding.
2//!
3//! [`encode_composite_key`] joins text fields into one string such that distinct
4//! field tuples can never collide. It is the building block for a stable identity
5//! key over an equality index, where a naive separator-join (`a:b:c`) is unsound:
6//! the fields themselves may contain the separator, so `["a", "b:c"]` and
7//! `["a:b", "c"]` would collide.
8
9use alloc::string::{String, ToString};
10
11/// Encodes `fields` into one string that is injective in the field tuple:
12/// distinct inputs always produce distinct outputs and equal inputs produce
13/// equal outputs, whatever the fields contain.
14///
15/// Each field is length-prefixed (`{byte_len}:{field}`). The byte count, read up
16/// to the first `:`, fixes each field's boundary unambiguously even when a field
17/// contains `:` or leading digits, so the concatenation is a prefix-free code.
18/// The result is an opaque identity token, not intended to be parsed back.
19///
20/// # Performance
21///
22/// This function is `O(total field bytes)` with a single output allocation.
23#[must_use]
24pub fn encode_composite_key(fields: &[&str]) -> String {
25 let capacity = fields
26 .iter()
27 .map(|field| field.len().saturating_add(8))
28 .sum();
29 let mut key = String::with_capacity(capacity);
30 for field in fields {
31 key.push_str(&field.len().to_string());
32 key.push(':');
33 key.push_str(field);
34 }
35 key
36}
37
38#[cfg(test)]
39mod tests {
40 use super::encode_composite_key;
41
42 #[test]
43 fn distinct_field_boundaries_do_not_collide() {
44 // A naive `:`-join would render both as "a:b:c"; length-prefixing keeps
45 // distinct tuples distinct.
46 assert_ne!(
47 encode_composite_key(&["a", "b:c"]),
48 encode_composite_key(&["a:b", "c"]),
49 );
50 }
51
52 #[test]
53 fn equal_inputs_encode_equally_and_arity_matters() {
54 assert_eq!(
55 encode_composite_key(&["x", "y"]),
56 encode_composite_key(&["x", "y"]),
57 );
58 // Field count is part of the identity.
59 assert_ne!(
60 encode_composite_key(&["xy"]),
61 encode_composite_key(&["x", "y"]),
62 );
63 }
64
65 #[test]
66 fn empty_digit_and_separator_fields_stay_injective() {
67 assert_ne!(encode_composite_key(&[]), encode_composite_key(&[""]));
68 assert_ne!(encode_composite_key(&[""]), encode_composite_key(&["", ""]));
69 // A leading digit in a field cannot be mistaken for a length prefix.
70 assert_ne!(
71 encode_composite_key(&["5abc"]),
72 encode_composite_key(&["5", "abc"]),
73 );
74 // Fields containing the ':' separator stay distinct across boundaries.
75 assert_ne!(
76 encode_composite_key(&["1:1", "a"]),
77 encode_composite_key(&["1", "1:a"]),
78 );
79 }
80}