things3_cloud/ids/
matching.rs1use std::collections::HashMap;
2
3use crate::ids::{ThingsId, things_id::base58_encode_fixed};
4
5pub fn lcp_len(a: &str, b: &str) -> usize {
6 lcp_len_bytes(a.as_bytes(), b.as_bytes())
7}
8
9pub fn shortest_unique_prefixes(ids: &[ThingsId]) -> HashMap<ThingsId, String> {
18 if ids.is_empty() {
19 return HashMap::new();
20 }
21
22 let mut pairs: Vec<(ThingsId, [u8; 22], usize)> = ids
23 .iter()
24 .map(|id| {
25 let (buf, len) = base58_encode_fixed(id.as_bytes());
26 (id.clone(), buf, len)
27 })
28 .collect();
29 pairs.sort_unstable_by(|a, b| a.1[..a.2].cmp(&b.1[..b.2]));
30
31 let n = pairs.len();
32 let mut result = HashMap::with_capacity(n);
33 for i in 0..n {
34 let enc = &pairs[i].1[..pairs[i].2];
35 let left = if i > 0 {
36 lcp_len_bytes(enc, &pairs[i - 1].1[..pairs[i - 1].2])
37 } else {
38 0
39 };
40 let right = if i + 1 < n {
41 lcp_len_bytes(enc, &pairs[i + 1].1[..pairs[i + 1].2])
42 } else {
43 0
44 };
45 let need = (left.max(right) + 1).min(pairs[i].2);
49 let prefix = std::str::from_utf8(&enc[..need])
50 .expect("base58 output must be ASCII")
51 .to_owned();
52 result.insert(pairs[i].0.clone(), prefix);
53 }
54
55 result
56}
57
58pub fn longest_shortest_unique_prefix_len(ids: &[ThingsId]) -> usize {
63 if ids.is_empty() {
64 return 0;
65 }
66
67 shortest_unique_prefixes(ids)
68 .values()
69 .map(|s| s.len())
70 .max()
71 .unwrap_or(1)
72}
73
74#[inline]
75fn lcp_len_bytes(a: &[u8], b: &[u8]) -> usize {
76 let max = a.len().min(b.len());
77 for i in 0..max {
78 if a[i] != b[i] {
79 return i;
80 }
81 }
82 max
83}
84
85pub fn prefix_matches<'a>(sorted_ids: &'a [ThingsId], prefix: &str) -> Vec<&'a ThingsId> {
86 let prefix = prefix.as_bytes();
87 if prefix.is_empty() {
88 return sorted_ids.iter().collect();
89 }
90
91 let mut matches = Vec::new();
92 let mut collecting = false;
93 for id in sorted_ids {
94 let (buf, len) = base58_encode_fixed(id.as_bytes());
95 let encoded = &buf[..len];
96 if encoded.starts_with(prefix) {
97 matches.push(id);
98 collecting = true;
99 } else if collecting {
100 break;
101 }
102 }
103 matches
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109
110 #[test]
111 fn shortest_unique_prefixes_are_actually_unique() {
112 let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
115 let prefixes = shortest_unique_prefixes(&ids);
116
117 assert_eq!(prefixes.len(), ids.len());
118
119 for (id, prefix) in &prefixes {
120 let matches: Vec<&ThingsId> = ids
121 .iter()
122 .filter(|other| other.to_string().starts_with(prefix.as_str()))
123 .collect();
124 assert_eq!(
125 matches.len(),
126 1,
127 "prefix {:?} for {:?} matched {} IDs: {:?}",
128 prefix,
129 id.to_string(),
130 matches.len(),
131 matches.iter().map(|m| m.to_string()).collect::<Vec<_>>()
132 );
133 assert_eq!(matches[0], id);
134 }
135 }
136
137 #[test]
138 fn shortest_unique_prefixes_are_minimal() {
139 let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
142 let prefixes = shortest_unique_prefixes(&ids);
143
144 for (_id, prefix) in &prefixes {
145 if prefix.len() <= 1 {
146 continue; }
148 let shorter = &prefix[..prefix.len() - 1];
149 let matches: Vec<&ThingsId> = ids
150 .iter()
151 .filter(|other| other.to_string().starts_with(shorter))
152 .collect();
153 assert!(
154 matches.len() > 1,
155 "prefix {:?} is not minimal — {:?} (one char shorter) still only matches 1 ID",
156 prefix,
157 shorter
158 );
159 }
160 }
161
162 #[test]
163 fn single_id() {
164 let ids = vec![ThingsId::random()];
165 let prefixes = shortest_unique_prefixes(&ids);
166 assert_eq!(prefixes.len(), 1);
167 let prefix = prefixes.values().next().unwrap();
168 assert_eq!(prefix.len(), 1, "single ID should have 1-char prefix");
169 }
170
171 #[test]
172 fn empty_input() {
173 let prefixes = shortest_unique_prefixes(&[]);
174 assert!(prefixes.is_empty());
175 }
176
177 #[test]
178 fn longest_shortest_prefix_len_drives_fixed_width_unique_rendering() {
179 fn find_case_ids() -> Vec<ThingsId> {
180 let mut a: Option<ThingsId> = None;
181 let mut b_left: Option<ThingsId> = None;
182 let mut b_right: Option<ThingsId> = None;
183
184 for _ in 0..1_000_000 {
185 let id = ThingsId::random();
186 let (buf, len) = base58_encode_fixed(id.as_bytes());
187 let s = std::str::from_utf8(&buf[..len]).unwrap().to_owned();
188
189 if s.starts_with('A') {
190 if a.is_none() {
191 a = Some(id);
192 }
193 continue;
194 }
195
196 if s.starts_with('B') {
197 let second = s.chars().nth(1).unwrap_or('1');
198 if b_left.is_none() {
199 b_left = Some(id);
200 } else {
201 let left_second = b_left
202 .as_ref()
203 .unwrap()
204 .to_string()
205 .chars()
206 .nth(1)
207 .unwrap_or('1');
208 if second != left_second {
209 b_right = Some(id);
210 }
211 }
212 }
213
214 if a.is_some() && b_left.is_some() && b_right.is_some() {
215 break;
216 }
217 }
218
219 vec![
220 a.expect("must find an A* id"),
221 b_left.expect("must find first B* id"),
222 b_right.expect("must find second B* id with different second char"),
223 ]
224 }
225
226 let ids = find_case_ids();
227 let width = longest_shortest_unique_prefix_len(&ids);
228 assert_eq!(width, 2);
229
230 for id in &ids {
231 let prefix = id.to_string().chars().take(width).collect::<String>();
232 let matches: Vec<&ThingsId> = ids
233 .iter()
234 .filter(|other| other.to_string().starts_with(prefix.as_str()))
235 .collect();
236 assert_eq!(
237 matches.len(),
238 1,
239 "prefix {prefix:?} should identify exactly one id"
240 );
241 assert_eq!(matches[0], id);
242 }
243 }
244}