1use zer_core::{record::Record, schema::Schema};
2
3use crate::normalize::normalize_plate;
4use super::BlockingKey;
5
6pub struct LicensePlateNormKey {
13 plate_field: String,
14}
15
16impl LicensePlateNormKey {
17 pub fn new(plate_field: &str) -> Self {
18 Self { plate_field: plate_field.into() }
19 }
20}
21
22impl BlockingKey for LicensePlateNormKey {
23 fn name(&self) -> &str { "plate_norm" }
24
25 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
26 let cow = record.field_as_str(&self.plate_field);
27 let plate = match cow.as_deref() {
28 Some(s) => s,
29 None => return vec![],
30 };
31 let norm = normalize_plate(plate);
32 if norm.is_empty() { return vec![]; }
33 vec![norm]
34 }
35}
36
37pub struct PlateOCRFuzzyKey {
51 plate_field: String,
52}
53
54impl PlateOCRFuzzyKey {
55 pub fn new(plate_field: &str) -> Self {
56 Self { plate_field: plate_field.into() }
57 }
58}
59
60impl BlockingKey for PlateOCRFuzzyKey {
61 fn name(&self) -> &str { "plate_ocr" }
62
63 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
64 let cow = record.field_as_str(&self.plate_field);
65 let plate = match cow.as_deref() {
66 Some(s) => s,
67 None => return vec![],
68 };
69 let norm = normalize_plate(plate);
70 if norm.is_empty() { return vec![]; }
71
72 let chars: Vec<char> = norm.chars().collect();
73 let n = chars.len();
74 let mut keys = Vec::with_capacity(n + 1);
75 keys.push(norm.clone());
76
77 for i in 0..n {
81 let variant: String = chars.iter().enumerate()
82 .filter(|&(j, _)| j != i)
83 .map(|(_, &c)| c)
84 .collect();
85 keys.push(variant);
86 }
87
88 keys.sort();
89 keys.dedup();
90 keys
91 }
92}
93
94pub struct CameraTimeWindowKey {
102 camera_field: String,
103 time_field: String,
104 window_mins: u32,
105}
106
107impl CameraTimeWindowKey {
108 pub fn new(camera_field: &str, time_field: &str, window_mins: u32) -> Self {
109 Self {
110 camera_field: camera_field.into(),
111 time_field: time_field.into(),
112 window_mins,
113 }
114 }
115}
116
117fn time_to_slot(datetime: &str, window: u32) -> Option<u32> {
118 let t_idx = datetime.find('T')?;
119 let time_part = &datetime[t_idx + 1..];
120 let mut parts = time_part.splitn(3, ':');
121 let hour: u32 = parts.next()?.parse().ok()?;
122 let minute: u32 = parts.next()?.parse().ok()?;
123 Some((hour * 60 + minute) / window)
124}
125
126impl BlockingKey for CameraTimeWindowKey {
127 fn name(&self) -> &str { "cam_time_window" }
128
129 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
130 let cam_cow = record.field_as_str(&self.camera_field);
131 let cam = match cam_cow.as_deref() {
132 Some(s) => s,
133 None => return vec![],
134 };
135 let ts_cow = record.field_as_str(&self.time_field);
136 let ts = match ts_cow.as_deref() {
137 Some(s) => s,
138 None => return vec![],
139 };
140 let date = ts.get(..10).unwrap_or("");
141 let slot = match time_to_slot(ts, self.window_mins) {
142 Some(s) => s,
143 None => return vec![],
144 };
145 vec![format!("{}:{}:{}", cam, date, slot)]
146 }
147}
148
149pub struct GeoGridKey {
156 lat_field: String,
157 lon_field: String,
158 grid_size: f64,
159}
160
161impl GeoGridKey {
162 pub fn new(lat_field: &str, lon_field: &str, grid_size: f64) -> Self {
163 Self {
164 lat_field: lat_field.into(),
165 lon_field: lon_field.into(),
166 grid_size,
167 }
168 }
169}
170
171impl BlockingKey for GeoGridKey {
172 fn name(&self) -> &str { "geo_grid" }
173
174 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
175 let lat = match record.field_as::<f64>(&self.lat_field) {
176 Some(v) => v,
177 None => return vec![],
178 };
179 let lon = match record.field_as::<f64>(&self.lon_field) {
180 Some(v) => v,
181 None => return vec![],
182 };
183 let lat_cell = (lat / self.grid_size).floor() as i64;
184 let lon_cell = (lon / self.grid_size).floor() as i64;
185 vec![format!("{}:{}", lat_cell, lon_cell)]
186 }
187}
188
189#[cfg(test)]
192mod tests {
193 use super::*;
194 use zer_core::{record::FieldValue, schema::{FieldKind, SchemaBuilder}};
195
196 fn schema() -> Schema {
197 SchemaBuilder::new()
198 .field("kenteken", FieldKind::LicensePlate)
199 .field("camera_id", FieldKind::Categorical)
200 .field("tijdstip", FieldKind::Timestamp)
201 .field("lat", FieldKind::GpsCoordinate)
202 .field("lon", FieldKind::GpsCoordinate)
203 .build()
204 .unwrap()
205 }
206
207 fn rec(id: u64, kenteken: &str, camera: &str, ts: &str, lat: &str, lon: &str) -> Record {
208 Record::new(id)
209 .insert("kenteken", FieldValue::Text(kenteken.into()))
210 .insert("camera_id", FieldValue::Text(camera.into()))
211 .insert("tijdstip", FieldValue::Text(ts.into()))
212 .insert("lat", FieldValue::Text(lat.into()))
213 .insert("lon", FieldValue::Text(lon.into()))
214 }
215
216 #[test]
219 fn plate_norm_strips_hyphens() {
220 let schema = schema();
221 let key = LicensePlateNormKey::new("kenteken");
222 let r = rec(1, "25-XKL-9", "CAM-A1-001", "2025-01-01T10:00:00", "52.3", "4.9");
223 let keys = key.extract(&r, &schema);
224 assert_eq!(keys, vec!["25XKL9"]);
225 }
226
227 #[test]
228 fn plate_norm_empty_field_returns_empty() {
229 let schema = schema();
230 let key = LicensePlateNormKey::new("kenteken");
231 let r = Record::new(1);
232 assert!(key.extract(&r, &schema).is_empty());
233 }
234
235 #[test]
238 fn ocr_fuzzy_original_and_confused_share_key() {
239 let schema = schema();
240 let key = PlateOCRFuzzyKey::new("kenteken");
241
242 let true_r = rec(1, "CX-180-W", "CAM-A1-001", "2025-01-01T10:00:00", "52.3", "4.9");
244 let ocr_r = rec(2, "CX-I80-W", "CAM-A1-001", "2025-01-01T10:00:00", "52.3", "4.9");
246
247 let true_keys: std::collections::HashSet<String> =
248 key.extract(&true_r, &schema).into_iter().collect();
249 let ocr_keys: std::collections::HashSet<String> =
250 key.extract(&ocr_r, &schema).into_iter().collect();
251
252 let shared: Vec<_> = true_keys.intersection(&ocr_keys).collect();
253 assert!(
254 !shared.is_empty(),
255 "true plate and OCR plate must share at least one fuzzy key; true={true_keys:?}, ocr={ocr_keys:?}"
256 );
257 }
258
259 #[test]
260 fn ocr_fuzzy_emits_multiple_variants() {
261 let schema = schema();
262 let key = PlateOCRFuzzyKey::new("kenteken");
263 let r = rec(1, "L01A4", "CAM", "2025-01-01T08:00:00", "52.0", "4.0");
265 let keys = key.extract(&r, &schema);
266 assert!(keys.len() >= 4, "should emit original + deletion variants; got {keys:?}");
267 assert!(keys.contains(&"L01A4".to_string()), "original key must be present");
268 assert!(keys.contains(&"01A4".to_string()), "deletion at pos 0 (L) expected");
270 assert!(keys.contains(&"L0A4".to_string()), "deletion at pos 2 (1) expected");
271 }
272
273 #[test]
276 fn camera_time_window_same_slot() {
277 let schema = schema();
278 let key = CameraTimeWindowKey::new("camera_id", "tijdstip", 10);
279
280 let r1 = rec(1, "X", "CAM-A1-001", "2025-06-01T14:02:00", "52.0", "4.0");
281 let r2 = rec(2, "Y", "CAM-A1-001", "2025-06-01T14:08:00", "52.0", "4.0");
282 assert_eq!(key.extract(&r1, &schema), key.extract(&r2, &schema));
284 }
285
286 #[test]
287 fn camera_time_window_different_slot() {
288 let schema = schema();
289 let key = CameraTimeWindowKey::new("camera_id", "tijdstip", 10);
290
291 let r1 = rec(1, "X", "CAM-A1-001", "2025-06-01T14:02:00", "52.0", "4.0");
292 let r2 = rec(2, "Y", "CAM-A1-001", "2025-06-01T14:12:00", "52.0", "4.0");
293 assert_ne!(key.extract(&r1, &schema), key.extract(&r2, &schema));
295 }
296
297 #[test]
300 fn geo_grid_nearby_records_share_key() {
301 let schema = schema();
302 let key = GeoGridKey::new("lat", "lon", 0.01);
303
304 let r1 = rec(1, "X", "CAM", "2025-01-01T10:00:00", "52.345", "4.901");
305 let r2 = rec(2, "Y", "CAM", "2025-01-01T10:00:00", "52.349", "4.907");
306 assert_eq!(key.extract(&r1, &schema), key.extract(&r2, &schema));
308 }
309
310 #[test]
311 fn geo_grid_distant_records_differ() {
312 let schema = schema();
313 let key = GeoGridKey::new("lat", "lon", 0.01);
314
315 let r1 = rec(1, "X", "CAM", "2025-01-01T10:00:00", "52.345", "4.901");
316 let r2 = rec(2, "Y", "CAM", "2025-01-01T10:00:00", "51.922", "4.479");
317 assert_ne!(key.extract(&r1, &schema), key.extract(&r2, &schema));
318 }
319}