Skip to main content

things3_cloud/ids/
matching.rs

1use std::collections::HashMap;
2
3use crate::ids::{ThingsId, things_id::base58_encode_fixed};
4
5pub fn lcp_len(a: &str, b: &str) -> usize {
6    lcp_len_bytes(a.as_bytes(), b.as_bytes())
7}
8
9/// Compute the shortest unique prefix for every ID.
10///
11/// Encodes each ID exactly once into a stack-allocated `[u8; 22]` buffer.
12/// The sort and LCP scan operate on byte slices with no heap allocation;
13/// only the final `result.insert` allocates a `String` per entry.
14///
15/// Note: the returned map is keyed by `ThingsId`, so duplicate IDs in the
16/// input are naturally coalesced to a single entry.
17pub fn shortest_unique_prefixes(ids: &[ThingsId]) -> HashMap<ThingsId, String> {
18    if ids.is_empty() {
19        return HashMap::new();
20    }
21
22    let mut pairs: Vec<(ThingsId, [u8; 22], usize)> = ids
23        .iter()
24        .map(|id| {
25            let (buf, len) = base58_encode_fixed(id.as_bytes());
26            (id.clone(), buf, len)
27        })
28        .collect();
29    pairs.sort_unstable_by(|a, b| a.1[..a.2].cmp(&b.1[..b.2]));
30
31    let n = pairs.len();
32    let mut result = HashMap::with_capacity(n);
33    for i in 0..n {
34        let enc = &pairs[i].1[..pairs[i].2];
35        let left = if i > 0 {
36            lcp_len_bytes(enc, &pairs[i - 1].1[..pairs[i - 1].2])
37        } else {
38            0
39        };
40        let right = if i + 1 < n {
41            lcp_len_bytes(enc, &pairs[i + 1].1[..pairs[i + 1].2])
42        } else {
43            0
44        };
45        // +1 to go one character beyond the shared prefix.
46        // Clamp to the full encoded length — if an ID's encoding is a
47        // prefix of another, the full string is the shortest unique prefix.
48        let need = (left.max(right) + 1).min(pairs[i].2);
49        let prefix = std::str::from_utf8(&enc[..need])
50            .expect("base58 output must be ASCII")
51            .to_owned();
52        result.insert(pairs[i].0.clone(), prefix);
53    }
54
55    result
56}
57
58/// Return the shared width needed to display group-local unique IDs.
59///
60/// This is the maximum length among all shortest unique prefixes for the
61/// given group. Callers can then render `id[..width]` for every row.
62pub fn longest_shortest_unique_prefix_len(ids: &[ThingsId]) -> usize {
63    if ids.is_empty() {
64        return 0;
65    }
66
67    shortest_unique_prefixes(ids)
68        .values()
69        .map(|s| s.len())
70        .max()
71        .unwrap_or(1)
72}
73
74#[inline]
75fn lcp_len_bytes(a: &[u8], b: &[u8]) -> usize {
76    let max = a.len().min(b.len());
77    for i in 0..max {
78        if a[i] != b[i] {
79            return i;
80        }
81    }
82    max
83}
84
85pub fn prefix_matches<'a>(sorted_ids: &'a [ThingsId], prefix: &str) -> Vec<&'a ThingsId> {
86    let prefix = prefix.as_bytes();
87    if prefix.is_empty() {
88        return sorted_ids.iter().collect();
89    }
90
91    let mut matches = Vec::new();
92    let mut collecting = false;
93    for id in sorted_ids {
94        let (buf, len) = base58_encode_fixed(id.as_bytes());
95        let encoded = &buf[..len];
96        if encoded.starts_with(prefix) {
97            matches.push(id);
98            collecting = true;
99        } else if collecting {
100            break;
101        }
102    }
103    matches
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    #[test]
111    fn shortest_unique_prefixes_are_actually_unique() {
112        // Generate a set of random IDs and verify that each prefix matches
113        // exactly one ID from the input set.
114        let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
115        let prefixes = shortest_unique_prefixes(&ids);
116
117        assert_eq!(prefixes.len(), ids.len());
118
119        for (id, prefix) in &prefixes {
120            let matches: Vec<&ThingsId> = ids
121                .iter()
122                .filter(|other| other.to_string().starts_with(prefix.as_str()))
123                .collect();
124            assert_eq!(
125                matches.len(),
126                1,
127                "prefix {:?} for {:?} matched {} IDs: {:?}",
128                prefix,
129                id.to_string(),
130                matches.len(),
131                matches.iter().map(|m| m.to_string()).collect::<Vec<_>>()
132            );
133            assert_eq!(matches[0], id);
134        }
135    }
136
137    #[test]
138    fn shortest_unique_prefixes_are_minimal() {
139        // Each prefix should be the shortest possible: removing the last
140        // character should make it match more than one ID.
141        let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
142        let prefixes = shortest_unique_prefixes(&ids);
143
144        for (_id, prefix) in &prefixes {
145            if prefix.len() <= 1 {
146                continue; // can't shorten a 1-char prefix
147            }
148            let shorter = &prefix[..prefix.len() - 1];
149            let matches: Vec<&ThingsId> = ids
150                .iter()
151                .filter(|other| other.to_string().starts_with(shorter))
152                .collect();
153            assert!(
154                matches.len() > 1,
155                "prefix {:?} is not minimal — {:?} (one char shorter) still only matches 1 ID",
156                prefix,
157                shorter
158            );
159        }
160    }
161
162    #[test]
163    fn single_id() {
164        let ids = vec![ThingsId::random()];
165        let prefixes = shortest_unique_prefixes(&ids);
166        assert_eq!(prefixes.len(), 1);
167        let prefix = prefixes.values().next().unwrap();
168        assert_eq!(prefix.len(), 1, "single ID should have 1-char prefix");
169    }
170
171    #[test]
172    fn empty_input() {
173        let prefixes = shortest_unique_prefixes(&[]);
174        assert!(prefixes.is_empty());
175    }
176
177    #[test]
178    fn longest_shortest_prefix_len_drives_fixed_width_unique_rendering() {
179        fn find_case_ids() -> Vec<ThingsId> {
180            let mut a: Option<ThingsId> = None;
181            let mut b_left: Option<ThingsId> = None;
182            let mut b_right: Option<ThingsId> = None;
183
184            for _ in 0..1_000_000 {
185                let id = ThingsId::random();
186                let (buf, len) = base58_encode_fixed(id.as_bytes());
187                let s = std::str::from_utf8(&buf[..len]).unwrap().to_owned();
188
189                if s.starts_with('A') {
190                    if a.is_none() {
191                        a = Some(id);
192                    }
193                    continue;
194                }
195
196                if s.starts_with('B') {
197                    let second = s.chars().nth(1).unwrap_or('1');
198                    if b_left.is_none() {
199                        b_left = Some(id);
200                    } else {
201                        let left_second = b_left
202                            .as_ref()
203                            .unwrap()
204                            .to_string()
205                            .chars()
206                            .nth(1)
207                            .unwrap_or('1');
208                        if second != left_second {
209                            b_right = Some(id);
210                        }
211                    }
212                }
213
214                if a.is_some() && b_left.is_some() && b_right.is_some() {
215                    break;
216                }
217            }
218
219            vec![
220                a.expect("must find an A* id"),
221                b_left.expect("must find first B* id"),
222                b_right.expect("must find second B* id with different second char"),
223            ]
224        }
225
226        let ids = find_case_ids();
227        let width = longest_shortest_unique_prefix_len(&ids);
228        assert_eq!(width, 2);
229
230        for id in &ids {
231            let prefix = id.to_string().chars().take(width).collect::<String>();
232            let matches: Vec<&ThingsId> = ids
233                .iter()
234                .filter(|other| other.to_string().starts_with(prefix.as_str()))
235                .collect();
236            assert_eq!(
237                matches.len(),
238                1,
239                "prefix {prefix:?} should identify exactly one id"
240            );
241            assert_eq!(matches[0], id);
242        }
243    }
244}