1use zer_core::{record::Record, schema::Schema};
2
3use super::BlockingKey;
4use crate::normalize::normalize_plate;
5
6pub struct LicensePlateNormKey {
13 plate_field: String,
14}
15
16impl LicensePlateNormKey {
17 pub fn new(plate_field: &str) -> Self {
18 Self {
19 plate_field: plate_field.into(),
20 }
21 }
22}
23
24impl BlockingKey for LicensePlateNormKey {
25 fn name(&self) -> &str {
26 "plate_norm"
27 }
28
29 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
30 let cow = record.field_as_str(&self.plate_field);
31 let plate = match cow.as_deref() {
32 Some(s) => s,
33 None => return vec![],
34 };
35 let norm = normalize_plate(plate);
36 if norm.is_empty() {
37 return vec![];
38 }
39 vec![norm]
40 }
41}
42
43pub struct PlateOCRFuzzyKey {
57 plate_field: String,
58}
59
60impl PlateOCRFuzzyKey {
61 pub fn new(plate_field: &str) -> Self {
62 Self {
63 plate_field: plate_field.into(),
64 }
65 }
66}
67
68impl BlockingKey for PlateOCRFuzzyKey {
69 fn name(&self) -> &str {
70 "plate_ocr"
71 }
72
73 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
74 let cow = record.field_as_str(&self.plate_field);
75 let plate = match cow.as_deref() {
76 Some(s) => s,
77 None => return vec![],
78 };
79 let norm = normalize_plate(plate);
80 if norm.is_empty() {
81 return vec![];
82 }
83
84 let chars: Vec<char> = norm.chars().collect();
85 let n = chars.len();
86 let mut keys = Vec::with_capacity(n + 1);
87 keys.push(norm.clone());
88
89 for i in 0..n {
93 let variant: String = chars
94 .iter()
95 .enumerate()
96 .filter(|&(j, _)| j != i)
97 .map(|(_, &c)| c)
98 .collect();
99 keys.push(variant);
100 }
101
102 keys.sort();
103 keys.dedup();
104 keys
105 }
106}
107
108pub struct CameraTimeWindowKey {
116 camera_field: String,
117 time_field: String,
118 window_mins: u32,
119}
120
121impl CameraTimeWindowKey {
122 pub fn new(camera_field: &str, time_field: &str, window_mins: u32) -> Self {
123 Self {
124 camera_field: camera_field.into(),
125 time_field: time_field.into(),
126 window_mins,
127 }
128 }
129}
130
131fn time_to_slot(datetime: &str, window: u32) -> Option<u32> {
132 let t_idx = datetime.find('T')?;
133 let time_part = &datetime[t_idx + 1..];
134 let mut parts = time_part.splitn(3, ':');
135 let hour: u32 = parts.next()?.parse().ok()?;
136 let minute: u32 = parts.next()?.parse().ok()?;
137 Some((hour * 60 + minute) / window)
138}
139
140impl BlockingKey for CameraTimeWindowKey {
141 fn name(&self) -> &str {
142 "cam_time_window"
143 }
144
145 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
146 let cam_cow = record.field_as_str(&self.camera_field);
147 let cam = match cam_cow.as_deref() {
148 Some(s) => s,
149 None => return vec![],
150 };
151 let ts_cow = record.field_as_str(&self.time_field);
152 let ts = match ts_cow.as_deref() {
153 Some(s) => s,
154 None => return vec![],
155 };
156 let date = ts.get(..10).unwrap_or("");
157 let slot = match time_to_slot(ts, self.window_mins) {
158 Some(s) => s,
159 None => return vec![],
160 };
161 vec![format!("{}:{}:{}", cam, date, slot)]
162 }
163}
164
165pub struct GeoGridKey {
172 lat_field: String,
173 lon_field: String,
174 grid_size: f64,
175}
176
177impl GeoGridKey {
178 pub fn new(lat_field: &str, lon_field: &str, grid_size: f64) -> Self {
179 Self {
180 lat_field: lat_field.into(),
181 lon_field: lon_field.into(),
182 grid_size,
183 }
184 }
185}
186
187impl BlockingKey for GeoGridKey {
188 fn name(&self) -> &str {
189 "geo_grid"
190 }
191
192 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
193 let lat = match record.field_as::<f64>(&self.lat_field) {
194 Some(v) => v,
195 None => return vec![],
196 };
197 let lon = match record.field_as::<f64>(&self.lon_field) {
198 Some(v) => v,
199 None => return vec![],
200 };
201 let lat_cell = (lat / self.grid_size).floor() as i64;
202 let lon_cell = (lon / self.grid_size).floor() as i64;
203 vec![format!("{}:{}", lat_cell, lon_cell)]
204 }
205}
206
207#[cfg(test)]
210mod tests {
211 use super::*;
212 use zer_core::{
213 record::FieldValue,
214 schema::{FieldKind, SchemaBuilder},
215 };
216
217 fn schema() -> Schema {
218 SchemaBuilder::new()
219 .field("kenteken", FieldKind::LicensePlate)
220 .field("camera_id", FieldKind::Categorical)
221 .field("tijdstip", FieldKind::Timestamp)
222 .field("lat", FieldKind::GpsCoordinate)
223 .field("lon", FieldKind::GpsCoordinate)
224 .build()
225 .unwrap()
226 }
227
228 fn rec(id: u64, kenteken: &str, camera: &str, ts: &str, lat: &str, lon: &str) -> Record {
229 Record::new(id)
230 .insert("kenteken", FieldValue::Text(kenteken.into()))
231 .insert("camera_id", FieldValue::Text(camera.into()))
232 .insert("tijdstip", FieldValue::Text(ts.into()))
233 .insert("lat", FieldValue::Text(lat.into()))
234 .insert("lon", FieldValue::Text(lon.into()))
235 }
236
237 #[test]
240 fn plate_norm_strips_hyphens() {
241 let schema = schema();
242 let key = LicensePlateNormKey::new("kenteken");
243 let r = rec(
244 1,
245 "25-XKL-9",
246 "CAM-A1-001",
247 "2025-01-01T10:00:00",
248 "52.3",
249 "4.9",
250 );
251 let keys = key.extract(&r, &schema);
252 assert_eq!(keys, vec!["25XKL9"]);
253 }
254
255 #[test]
256 fn plate_norm_empty_field_returns_empty() {
257 let schema = schema();
258 let key = LicensePlateNormKey::new("kenteken");
259 let r = Record::new(1);
260 assert!(key.extract(&r, &schema).is_empty());
261 }
262
263 #[test]
266 fn ocr_fuzzy_original_and_confused_share_key() {
267 let schema = schema();
268 let key = PlateOCRFuzzyKey::new("kenteken");
269
270 let true_r = rec(
272 1,
273 "CX-180-W",
274 "CAM-A1-001",
275 "2025-01-01T10:00:00",
276 "52.3",
277 "4.9",
278 );
279 let ocr_r = rec(
281 2,
282 "CX-I80-W",
283 "CAM-A1-001",
284 "2025-01-01T10:00:00",
285 "52.3",
286 "4.9",
287 );
288
289 let true_keys: std::collections::HashSet<String> =
290 key.extract(&true_r, &schema).into_iter().collect();
291 let ocr_keys: std::collections::HashSet<String> =
292 key.extract(&ocr_r, &schema).into_iter().collect();
293
294 let shared: Vec<_> = true_keys.intersection(&ocr_keys).collect();
295 assert!(
296 !shared.is_empty(),
297 "true plate and OCR plate must share at least one fuzzy key; true={true_keys:?}, ocr={ocr_keys:?}"
298 );
299 }
300
301 #[test]
302 fn ocr_fuzzy_emits_multiple_variants() {
303 let schema = schema();
304 let key = PlateOCRFuzzyKey::new("kenteken");
305 let r = rec(1, "L01A4", "CAM", "2025-01-01T08:00:00", "52.0", "4.0");
307 let keys = key.extract(&r, &schema);
308 assert!(
309 keys.len() >= 4,
310 "should emit original + deletion variants; got {keys:?}"
311 );
312 assert!(
313 keys.contains(&"L01A4".to_string()),
314 "original key must be present"
315 );
316 assert!(
318 keys.contains(&"01A4".to_string()),
319 "deletion at pos 0 (L) expected"
320 );
321 assert!(
322 keys.contains(&"L0A4".to_string()),
323 "deletion at pos 2 (1) expected"
324 );
325 }
326
327 #[test]
330 fn camera_time_window_same_slot() {
331 let schema = schema();
332 let key = CameraTimeWindowKey::new("camera_id", "tijdstip", 10);
333
334 let r1 = rec(1, "X", "CAM-A1-001", "2025-06-01T14:02:00", "52.0", "4.0");
335 let r2 = rec(2, "Y", "CAM-A1-001", "2025-06-01T14:08:00", "52.0", "4.0");
336 assert_eq!(key.extract(&r1, &schema), key.extract(&r2, &schema));
338 }
339
340 #[test]
341 fn camera_time_window_different_slot() {
342 let schema = schema();
343 let key = CameraTimeWindowKey::new("camera_id", "tijdstip", 10);
344
345 let r1 = rec(1, "X", "CAM-A1-001", "2025-06-01T14:02:00", "52.0", "4.0");
346 let r2 = rec(2, "Y", "CAM-A1-001", "2025-06-01T14:12:00", "52.0", "4.0");
347 assert_ne!(key.extract(&r1, &schema), key.extract(&r2, &schema));
349 }
350
351 #[test]
354 fn geo_grid_nearby_records_share_key() {
355 let schema = schema();
356 let key = GeoGridKey::new("lat", "lon", 0.01);
357
358 let r1 = rec(1, "X", "CAM", "2025-01-01T10:00:00", "52.345", "4.901");
359 let r2 = rec(2, "Y", "CAM", "2025-01-01T10:00:00", "52.349", "4.907");
360 assert_eq!(key.extract(&r1, &schema), key.extract(&r2, &schema));
362 }
363
364 #[test]
365 fn geo_grid_distant_records_differ() {
366 let schema = schema();
367 let key = GeoGridKey::new("lat", "lon", 0.01);
368
369 let r1 = rec(1, "X", "CAM", "2025-01-01T10:00:00", "52.345", "4.901");
370 let r2 = rec(2, "Y", "CAM", "2025-01-01T10:00:00", "51.922", "4.479");
371 assert_ne!(key.extract(&r1, &schema), key.extract(&r2, &schema));
372 }
373}