Skip to main content

things3_cloud/ids/
matching.rs

1use crate::ids::things_id::base58_encode_fixed;
2use crate::ids::ThingsId;
3use std::collections::HashMap;
4
5pub fn lcp_len(a: &str, b: &str) -> usize {
6    lcp_len_bytes(a.as_bytes(), b.as_bytes())
7}
8
9/// Compute the shortest unique prefix for every ID.
10///
11/// Encodes each ID exactly once into a stack-allocated `[u8; 22]` buffer.
12/// The sort and LCP scan operate on byte slices with no heap allocation;
13/// only the final `result.insert` allocates a `String` per entry.
14pub fn shortest_unique_prefixes(ids: &[ThingsId]) -> HashMap<ThingsId, String> {
15    if ids.is_empty() {
16        return HashMap::new();
17    }
18
19    // Encode each ID once into a fixed stack buffer + length.
20    let mut pairs: Vec<(ThingsId, [u8; 22], usize)> = ids
21        .iter()
22        .map(|id| {
23            let (buf, len) = base58_encode_fixed(id.as_bytes());
24            (id.clone(), buf, len)
25        })
26        .collect();
27    // Sort by the encoded string slice (same order as to_string() comparisons).
28    pairs.sort_unstable_by(|a, b| a.1[..a.2].cmp(&b.1[..b.2]));
29
30    let n = pairs.len();
31    let mut result = HashMap::with_capacity(n);
32    for i in 0..n {
33        let enc = &pairs[i].1[..pairs[i].2];
34        let left = if i > 0 {
35            lcp_len_bytes(enc, &pairs[i - 1].1[..pairs[i - 1].2])
36        } else {
37            0
38        };
39        let right = if i + 1 < n {
40            lcp_len_bytes(enc, &pairs[i + 1].1[..pairs[i + 1].2])
41        } else {
42            0
43        };
44        // +1 to go one character beyond the shared prefix.
45        // Clamp to the full encoded length — if an ID's encoding is a
46        // prefix of another, the full string is the shortest unique prefix.
47        let need = (left.max(right) + 1).min(pairs[i].2);
48        // SAFETY: base58 output is pure ASCII so any byte prefix is valid UTF-8.
49        let prefix = unsafe { std::str::from_utf8_unchecked(&enc[..need]) }.to_owned();
50        result.insert(pairs[i].0.clone(), prefix);
51    }
52
53    result
54}
55
56#[inline]
57fn lcp_len_bytes(a: &[u8], b: &[u8]) -> usize {
58    let max = a.len().min(b.len());
59    for i in 0..max {
60        if a[i] != b[i] {
61            return i;
62        }
63    }
64    max
65}
66
67pub fn prefix_matches<'a>(sorted_ids: &'a [ThingsId], prefix: &str) -> Vec<&'a ThingsId> {
68    sorted_ids
69        .iter()
70        .filter(|id| id.starts_with(prefix))
71        .collect()
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    #[test]
79    fn shortest_unique_prefixes_are_actually_unique() {
80        // Generate a set of random IDs and verify that each prefix matches
81        // exactly one ID from the input set.
82        let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
83        let prefixes = shortest_unique_prefixes(&ids);
84
85        assert_eq!(prefixes.len(), ids.len());
86
87        for (id, prefix) in &prefixes {
88            let matches: Vec<&ThingsId> = ids
89                .iter()
90                .filter(|other| other.to_string().starts_with(prefix.as_str()))
91                .collect();
92            assert_eq!(
93                matches.len(),
94                1,
95                "prefix {:?} for {:?} matched {} IDs: {:?}",
96                prefix,
97                id.to_string(),
98                matches.len(),
99                matches.iter().map(|m| m.to_string()).collect::<Vec<_>>()
100            );
101            assert_eq!(matches[0], id);
102        }
103    }
104
105    #[test]
106    fn shortest_unique_prefixes_are_minimal() {
107        // Each prefix should be the shortest possible: removing the last
108        // character should make it match more than one ID.
109        let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
110        let prefixes = shortest_unique_prefixes(&ids);
111
112        for (_id, prefix) in &prefixes {
113            if prefix.len() <= 1 {
114                continue; // can't shorten a 1-char prefix
115            }
116            let shorter = &prefix[..prefix.len() - 1];
117            let matches: Vec<&ThingsId> = ids
118                .iter()
119                .filter(|other| other.to_string().starts_with(shorter))
120                .collect();
121            assert!(
122                matches.len() > 1,
123                "prefix {:?} is not minimal — {:?} (one char shorter) still only matches 1 ID",
124                prefix,
125                shorter
126            );
127        }
128    }
129
130    #[test]
131    fn single_id() {
132        let ids = vec![ThingsId::random()];
133        let prefixes = shortest_unique_prefixes(&ids);
134        assert_eq!(prefixes.len(), 1);
135        let prefix = prefixes.values().next().unwrap();
136        assert_eq!(prefix.len(), 1, "single ID should have 1-char prefix");
137    }
138
139    #[test]
140    fn empty_input() {
141        let prefixes = shortest_unique_prefixes(&[]);
142        assert!(prefixes.is_empty());
143    }
144}