things3_cloud/ids/
matching.rs1use std::collections::HashMap;
2
3use crate::ids::{ThingsId, things_id::base58_encode_fixed};
4
5pub fn lcp_len(a: &str, b: &str) -> usize {
6 lcp_len_bytes(a.as_bytes(), b.as_bytes())
7}
8
9pub fn shortest_unique_prefixes(ids: &[ThingsId]) -> HashMap<ThingsId, String> {
15 if ids.is_empty() {
16 return HashMap::new();
17 }
18
19 let mut pairs: Vec<(ThingsId, [u8; 22], usize)> = ids
21 .iter()
22 .map(|id| {
23 let (buf, len) = base58_encode_fixed(id.as_bytes());
24 (id.clone(), buf, len)
25 })
26 .collect();
27 pairs.sort_unstable_by(|a, b| a.1[..a.2].cmp(&b.1[..b.2]));
29
30 let n = pairs.len();
31 let mut result = HashMap::with_capacity(n);
32 for i in 0..n {
33 let enc = &pairs[i].1[..pairs[i].2];
34 let left = if i > 0 {
35 lcp_len_bytes(enc, &pairs[i - 1].1[..pairs[i - 1].2])
36 } else {
37 0
38 };
39 let right = if i + 1 < n {
40 lcp_len_bytes(enc, &pairs[i + 1].1[..pairs[i + 1].2])
41 } else {
42 0
43 };
44 let need = (left.max(right) + 1).min(pairs[i].2);
48 let prefix = unsafe { std::str::from_utf8_unchecked(&enc[..need]) }.to_owned();
50 result.insert(pairs[i].0.clone(), prefix);
51 }
52
53 result
54}
55
56pub fn longest_shortest_unique_prefix_len(ids: &[ThingsId]) -> usize {
61 if ids.is_empty() {
62 return 0;
63 }
64
65 shortest_unique_prefixes(ids)
66 .values()
67 .map(|s| s.len())
68 .max()
69 .unwrap_or(1)
70}
71
72#[inline]
73fn lcp_len_bytes(a: &[u8], b: &[u8]) -> usize {
74 let max = a.len().min(b.len());
75 for i in 0..max {
76 if a[i] != b[i] {
77 return i;
78 }
79 }
80 max
81}
82
83pub fn prefix_matches<'a>(sorted_ids: &'a [ThingsId], prefix: &str) -> Vec<&'a ThingsId> {
84 sorted_ids
85 .iter()
86 .filter(|id| id.starts_with(prefix))
87 .collect()
88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93
94 #[test]
95 fn shortest_unique_prefixes_are_actually_unique() {
96 let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
99 let prefixes = shortest_unique_prefixes(&ids);
100
101 assert_eq!(prefixes.len(), ids.len());
102
103 for (id, prefix) in &prefixes {
104 let matches: Vec<&ThingsId> = ids
105 .iter()
106 .filter(|other| other.to_string().starts_with(prefix.as_str()))
107 .collect();
108 assert_eq!(
109 matches.len(),
110 1,
111 "prefix {:?} for {:?} matched {} IDs: {:?}",
112 prefix,
113 id.to_string(),
114 matches.len(),
115 matches.iter().map(|m| m.to_string()).collect::<Vec<_>>()
116 );
117 assert_eq!(matches[0], id);
118 }
119 }
120
121 #[test]
122 fn shortest_unique_prefixes_are_minimal() {
123 let ids: Vec<ThingsId> = (0..200).map(|_| ThingsId::random()).collect();
126 let prefixes = shortest_unique_prefixes(&ids);
127
128 for (_id, prefix) in &prefixes {
129 if prefix.len() <= 1 {
130 continue; }
132 let shorter = &prefix[..prefix.len() - 1];
133 let matches: Vec<&ThingsId> = ids
134 .iter()
135 .filter(|other| other.to_string().starts_with(shorter))
136 .collect();
137 assert!(
138 matches.len() > 1,
139 "prefix {:?} is not minimal — {:?} (one char shorter) still only matches 1 ID",
140 prefix,
141 shorter
142 );
143 }
144 }
145
146 #[test]
147 fn single_id() {
148 let ids = vec![ThingsId::random()];
149 let prefixes = shortest_unique_prefixes(&ids);
150 assert_eq!(prefixes.len(), 1);
151 let prefix = prefixes.values().next().unwrap();
152 assert_eq!(prefix.len(), 1, "single ID should have 1-char prefix");
153 }
154
155 #[test]
156 fn empty_input() {
157 let prefixes = shortest_unique_prefixes(&[]);
158 assert!(prefixes.is_empty());
159 }
160
161 #[test]
162 fn longest_shortest_prefix_len_drives_fixed_width_unique_rendering() {
163 fn find_case_ids() -> Vec<ThingsId> {
164 let mut a: Option<ThingsId> = None;
165 let mut b_left: Option<ThingsId> = None;
166 let mut b_right: Option<ThingsId> = None;
167
168 for n in 1u64..200_000 {
169 let mut raw = [0u8; 16];
170 for (i, byte) in raw.iter_mut().enumerate() {
171 let shift = ((i % 8) * 8) as u32;
172 let mixed = n.wrapping_mul(0x9E37_79B9_7F4A_7C15u64 ^ (i as u64 * 0xA5A5u64));
173 *byte = ((mixed >> shift) & 0xFF) as u8;
174 }
175
176 let (buf, len) = base58_encode_fixed(&raw);
177 let s = String::from_utf8(buf[..len].to_vec()).unwrap();
178 let id = s.parse::<ThingsId>().unwrap();
179
180 if s.starts_with('A') {
181 if a.is_none() {
182 a = Some(id);
183 }
184 continue;
185 }
186
187 if s.starts_with('B') {
188 let second = s.chars().nth(1).unwrap_or('1');
189 if b_left.is_none() {
190 b_left = Some(id);
191 } else {
192 let left_second = b_left
193 .as_ref()
194 .unwrap()
195 .to_string()
196 .chars()
197 .nth(1)
198 .unwrap_or('1');
199 if second != left_second {
200 b_right = Some(id);
201 }
202 }
203 }
204
205 if a.is_some() && b_left.is_some() && b_right.is_some() {
206 break;
207 }
208 }
209
210 vec![
211 a.expect("must find an A* id"),
212 b_left.expect("must find first B* id"),
213 b_right.expect("must find second B* id with different second char"),
214 ]
215 }
216
217 let ids = find_case_ids();
218 let width = longest_shortest_unique_prefix_len(&ids);
219 assert_eq!(width, 2);
220
221 for id in &ids {
222 let prefix = id.to_string().chars().take(width).collect::<String>();
223 let matches: Vec<&ThingsId> = ids
224 .iter()
225 .filter(|other| other.to_string().starts_with(prefix.as_str()))
226 .collect();
227 assert_eq!(
228 matches.len(),
229 1,
230 "prefix {prefix:?} should identify exactly one id"
231 );
232 assert_eq!(matches[0], id);
233 }
234 }
235}