Skip to main content

things3_cloud/ids/
matching.rs

1use std::collections::HashMap;
2
3use crate::ids::{ThingsId, things_id::base58_encode_fixed};
4
5pub fn lcp_len(a: &str, b: &str) -> usize {
6    lcp_len_bytes(a.as_bytes(), b.as_bytes())
7}
8
9/// Compute the shortest unique prefix for every ID.
10///
11/// Encodes each ID exactly once into a stack-allocated `[u8; 22]` buffer.
12/// The sort and LCP scan operate on byte slices with no heap allocation;
13/// only the final `result.insert` allocates a `String` per entry.
14pub fn shortest_unique_prefixes(ids: &[ThingsId]) -> HashMap<ThingsId, String> {
15    if ids.is_empty() {
16        return HashMap::new();
17    }
18
19    // Encode each ID once into a fixed stack buffer + length.
20    let mut pairs: Vec<(ThingsId, [u8; 22], usize)> = ids
21        .iter()
22        .map(|id| {
23            let (buf, len) = base58_encode_fixed(id.as_bytes());
24            (id.clone(), buf, len)
25        })
26        .collect();
27    // Sort by the encoded string slice (same order as to_string() comparisons).
28    pairs.sort_unstable_by(|a, b| a.1[..a.2].cmp(&b.1[..b.2]));
29
30    let n = pairs.len();
31    let mut result = HashMap::with_capacity(n);
32    for i in 0..n {
33        let enc = &pairs[i].1[..pairs[i].2];
34        let left = if i > 0 {
35            lcp_len_bytes(enc, &pairs[i - 1].1[..pairs[i - 1].2])
36        } else {
37            0
38        };
39        let right = if i + 1 < n {
40            lcp_len_bytes(enc, &pairs[i + 1].1[..pairs[i + 1].2])
41        } else {
42            0
43        };
44        // +1 to go one character beyond the shared prefix.
45        // Clamp to the full encoded length — if an ID's encoding is a
46        // prefix of another, the full string is the shortest unique prefix.
47        let need = (left.max(right) + 1).min(pairs[i].2);
48        // SAFETY: base58 output is pure ASCII so any byte prefix is valid UTF-8.
49        let prefix = unsafe { std::str::from_utf8_unchecked(&enc[..need]) }.to_owned();
50        result.insert(pairs[i].0.clone(), prefix);
51    }
52
53    result
54}
55
56/// Return the shared width needed to display group-local unique IDs.
57///
58/// This is the maximum length among all shortest unique prefixes for the
59/// given group. Callers can then render `id[..width]` for every row.
60pub fn longest_shortest_unique_prefix_len(ids: &[ThingsId]) -> usize {
61    if ids.is_empty() {
62        return 0;
63    }
64
65    shortest_unique_prefixes(ids)
66        .values()
67        .map(|s| s.len())
68        .max()
69        .unwrap_or(1)
70}
71
72#[inline]
73fn lcp_len_bytes(a: &[u8], b: &[u8]) -> usize {
74    let max = a.len().min(b.len());
75    for i in 0..max {
76        if a[i] != b[i] {
77            return i;
78        }
79    }
80    max
81}
82
83pub fn prefix_matches<'a>(sorted_ids: &'a [ThingsId], prefix: &str) -> Vec<&'a ThingsId> {
84    sorted_ids
85        .iter()
86        .filter(|id| id.starts_with(prefix))
87        .collect()
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn shortest_unique_prefixes_are_actually_unique() {
96        // Generate a set of random IDs and verify that each prefix matches
97        // exactly one ID from the input set.
98        let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
99        let prefixes = shortest_unique_prefixes(&ids);
100
101        assert_eq!(prefixes.len(), ids.len());
102
103        for (id, prefix) in &prefixes {
104            let matches: Vec<&ThingsId> = ids
105                .iter()
106                .filter(|other| other.to_string().starts_with(prefix.as_str()))
107                .collect();
108            assert_eq!(
109                matches.len(),
110                1,
111                "prefix {:?} for {:?} matched {} IDs: {:?}",
112                prefix,
113                id.to_string(),
114                matches.len(),
115                matches.iter().map(|m| m.to_string()).collect::<Vec<_>>()
116            );
117            assert_eq!(matches[0], id);
118        }
119    }
120
121    #[test]
122    fn shortest_unique_prefixes_are_minimal() {
123        // Each prefix should be the shortest possible: removing the last
124        // character should make it match more than one ID.
125        let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
126        let prefixes = shortest_unique_prefixes(&ids);
127
128        for (_id, prefix) in &prefixes {
129            if prefix.len() <= 1 {
130                continue; // can't shorten a 1-char prefix
131            }
132            let shorter = &prefix[..prefix.len() - 1];
133            let matches: Vec<&ThingsId> = ids
134                .iter()
135                .filter(|other| other.to_string().starts_with(shorter))
136                .collect();
137            assert!(
138                matches.len() > 1,
139                "prefix {:?} is not minimal — {:?} (one char shorter) still only matches 1 ID",
140                prefix,
141                shorter
142            );
143        }
144    }
145
146    #[test]
147    fn single_id() {
148        let ids = vec![ThingsId::random()];
149        let prefixes = shortest_unique_prefixes(&ids);
150        assert_eq!(prefixes.len(), 1);
151        let prefix = prefixes.values().next().unwrap();
152        assert_eq!(prefix.len(), 1, "single ID should have 1-char prefix");
153    }
154
155    #[test]
156    fn empty_input() {
157        let prefixes = shortest_unique_prefixes(&[]);
158        assert!(prefixes.is_empty());
159    }
160
161    #[test]
162    fn longest_shortest_prefix_len_drives_fixed_width_unique_rendering() {
163        fn find_case_ids() -> Vec<ThingsId> {
164            let mut a: Option<ThingsId> = None;
165            let mut b_left: Option<ThingsId> = None;
166            let mut b_right: Option<ThingsId> = None;
167
168            for n in 1u64..200_000 {
169                let mut raw = [0u8; 16];
170                for (i, byte) in raw.iter_mut().enumerate() {
171                    let shift = ((i % 8) * 8) as u32;
172                    let mixed = n.wrapping_mul(0x9E37_79B9_7F4A_7C15u64 ^ (i as u64 * 0xA5A5u64));
173                    *byte = ((mixed >> shift) & 0xFF) as u8;
174                }
175
176                let (buf, len) = base58_encode_fixed(&raw);
177                let s = String::from_utf8(buf[..len].to_vec()).unwrap();
178                let id = s.parse::<ThingsId>().unwrap();
179
180                if s.starts_with('A') {
181                    if a.is_none() {
182                        a = Some(id);
183                    }
184                    continue;
185                }
186
187                if s.starts_with('B') {
188                    let second = s.chars().nth(1).unwrap_or('1');
189                    if b_left.is_none() {
190                        b_left = Some(id);
191                    } else {
192                        let left_second = b_left
193                            .as_ref()
194                            .unwrap()
195                            .to_string()
196                            .chars()
197                            .nth(1)
198                            .unwrap_or('1');
199                        if second != left_second {
200                            b_right = Some(id);
201                        }
202                    }
203                }
204
205                if a.is_some() && b_left.is_some() && b_right.is_some() {
206                    break;
207                }
208            }
209
210            vec![
211                a.expect("must find an A* id"),
212                b_left.expect("must find first B* id"),
213                b_right.expect("must find second B* id with different second char"),
214            ]
215        }
216
217        let ids = find_case_ids();
218        let width = longest_shortest_unique_prefix_len(&ids);
219        assert_eq!(width, 2);
220
221        for id in &ids {
222            let prefix = id.to_string().chars().take(width).collect::<String>();
223            let matches: Vec<&ThingsId> = ids
224                .iter()
225                .filter(|other| other.to_string().starts_with(prefix.as_str()))
226                .collect();
227            assert_eq!(
228                matches.len(),
229                1,
230                "prefix {prefix:?} should identify exactly one id"
231            );
232            assert_eq!(matches[0], id);
233        }
234    }
235}