Skip to main content

zer_blocking/
factory.rs

1use zer_core::schema::{FieldKind, Schema};
2
3use crate::{
4    blocker::CompositeBlocker,
5    keys::{
6        AddressInitialKey, AliasPhoneticKey, BlockingKey, CameraTimeWindowKey, DateFragmentKey,
7        DateGranularity, DocumentSuffixKey, ExactFieldKey, FuzzyYearKey, GeoGridKey,
8        LicensePlateNormKey, PhoneticNameDobInitialKey, PhoneticNameDobKey, PlateOCRFuzzyKey,
9        SuffixKey, TransliteratedPhoneticKey,
10    },
11};
12
13/// High-level domain category for a dataset.
14///
15/// Pass to `BlockerFactory::from_schema_category` to get a `CompositeBlocker`
16/// whose keys are pre-tuned for that category, rather than relying solely on
17/// generic `FieldKind` heuristics.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum SchemaCategory {
20    /// General person registry (KvK directors, population registers).
21    PersonRegistry,
22    /// SIS II wanted/missing persons, aliases, name transpositions, estimated DOBs.
23    WantedPersons,
24    /// ANPR vehicle passage logs, license plate OCR, camera/time windows, GPS grid.
25    ANPRPassages,
26    /// Call detail records, phone number suffix, categorical (cell tower / IMEI).
27    CallDetailRecords,
28    /// SIM subscriber snapshots, phone suffix, IMSI/ICCID suffix, categorical.
29    SIMSubscribers,
30    /// FIU financial intelligence, account/transaction ID suffix, date fragments.
31    FinancialIntelligence,
32}
33
34/// A single blocking rule used inside [`CustomSchemaCategory`].
35///
36/// Each variant describes how to derive one set of blocking keys from a
37/// schema's `FieldKind` annotations. The `Custom` variant is an escape
38/// hatch for any key the built-in rules don't cover.
39enum CategoryRule {
40    /// `SuffixKey(n)` on every `FieldKind::Phone` field.
41    PhoneSuffix(usize),
42    /// `SuffixKey(n)` on every `FieldKind::Id` field.
43    IdSuffix(usize),
44    /// `DocumentSuffixKey(n)` on every `FieldKind::Id` field (strips non-alphanumeric, uppercases).
45    DocumentSuffix(usize),
46    /// `ExactFieldKey` on every `FieldKind::Categorical` field.
47    ExactCategorical,
48    /// `DateFragmentKey` with the given granularity on the first `FieldKind::Date` field.
49    DateFragment(DateGranularity),
50    /// `PhoneticNameDobKey` using the last `Name` field as surname and first `Date` field as DOB.
51    PhoneticNameDob,
52    /// `PhoneticNameDobInitialKey`: surname phonetic + first-name initial + DOB year.
53    /// Requires at least two `Name` fields (first = given name, last = surname).
54    PhoneticNameDobInitial,
55    /// `AddressInitialKey` using the first `Address` field and first `Name` field as initial.
56    AddressInitial,
57    /// A fully custom key, the escape hatch for anything the built-in rules don't cover.
58    Custom(Box<dyn BlockingKey>),
59}
60
61/// A user-defined blocking category assembled from individual rules.
62///
63/// Build one with the fluent `with_*` methods and pass it to
64/// [`BlockerFactory::from_custom_category`].
65///
66/// # Example
67/// ```
68/// use zer_blocking::{BlockerFactory, CustomSchemaCategory};
69/// use zer_blocking::keys::DateGranularity;
70///
71/// let category = CustomSchemaCategory::new()
72///     .with_phonetic_name_dob()
73///     .with_id_suffix(4)
74///     .with_exact_categorical();
75///
76/// // let blocker = BlockerFactory::from_custom_category(&schema, category);
77/// ```
78pub struct CustomSchemaCategory {
79    rules: Vec<CategoryRule>,
80}
81
82impl CustomSchemaCategory {
83    pub fn new() -> Self {
84        Self { rules: vec![] }
85    }
86
87    /// Add a `SuffixKey(n)` on every `Phone` field.
88    pub fn with_phone_suffix(mut self, n: usize) -> Self {
89        self.rules.push(CategoryRule::PhoneSuffix(n));
90        self
91    }
92
93    /// Add a `SuffixKey(n)` on every `Id` field (digits-only suffix).
94    pub fn with_id_suffix(mut self, n: usize) -> Self {
95        self.rules.push(CategoryRule::IdSuffix(n));
96        self
97    }
98
99    /// Add a `DocumentSuffixKey(n)` on every `Id` field (alphanumeric suffix, uppercased).
100    pub fn with_document_suffix(mut self, n: usize) -> Self {
101        self.rules.push(CategoryRule::DocumentSuffix(n));
102        self
103    }
104
105    /// Add an `ExactFieldKey` on every `Categorical` field.
106    pub fn with_exact_categorical(mut self) -> Self {
107        self.rules.push(CategoryRule::ExactCategorical);
108        self
109    }
110
111    /// Add a `DateFragmentKey` with the given granularity on the first `Date` field.
112    pub fn with_date_fragment(mut self, granularity: DateGranularity) -> Self {
113        self.rules.push(CategoryRule::DateFragment(granularity));
114        self
115    }
116
117    /// Add a `PhoneticNameDobKey` using the last `Name` field and the first `Date` field.
118    pub fn with_phonetic_name_dob(mut self) -> Self {
119        self.rules.push(CategoryRule::PhoneticNameDob);
120        self
121    }
122
123    /// Add a `PhoneticNameDobInitialKey` (surname phonetic + first-name initial + DOB year).
124    /// Falls back to `PhoneticNameDobKey` when only one `Name` field is present.
125    pub fn with_phonetic_name_dob_initial(mut self) -> Self {
126        self.rules.push(CategoryRule::PhoneticNameDobInitial);
127        self
128    }
129
130    /// Add an `AddressInitialKey` using the first `Address` field and first `Name` field as initial.
131    pub fn with_address_initial(mut self) -> Self {
132        self.rules.push(CategoryRule::AddressInitial);
133        self
134    }
135
136    /// Add an arbitrary blocking key, escape hatch for keys the built-in rules don't cover.
137    pub fn with_key(mut self, key: impl BlockingKey + 'static) -> Self {
138        self.rules.push(CategoryRule::Custom(Box::new(key)));
139        self
140    }
141}
142
143impl Default for CustomSchemaCategory {
144    fn default() -> Self {
145        Self::new()
146    }
147}
148
149pub struct BlockerFactory;
150
151impl BlockerFactory {
152    /// Build a `CompositeBlocker` whose blocking keys are chosen automatically
153    /// from the `Schema`'s `FieldKind` annotations.
154    ///
155    /// Priority rules (applied in order):
156    /// - 2+ Name fields + Date: uses `PhoneticNameDobInitialKey` (surname phonetic + first-name initial + DOB year)
157    /// - 1 Name field + Date: uses `PhoneticNameDobKey` (surname phonetic + DOB year)
158    /// - Name + Address: uses `AddressInitialKey` (first address token + first name initial)
159    /// - Phone: adds `SuffixKey(7)` on the first Phone field
160    /// - Id: adds `SuffixKey(4)` on each Id field
161    /// - Date only (no Name): adds `DateFragmentKey(YearMonth)` on the first Date field
162    /// - Categorical: adds `ExactFieldKey` on each Categorical field
163    pub fn from_schema(schema: &Schema) -> CompositeBlocker {
164        let mut blocker = CompositeBlocker::new();
165
166        let name_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
167        let date_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
168        let addr_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Address).collect();
169        let phone_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Phone).collect();
170        let id_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Id).collect();
171        let cat_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Categorical).collect();
172
173        if let (Some(&surname), Some(&dob)) = (name_fields.last(), date_fields.first()) {
174            if name_fields.len() >= 2 {
175                let first_name = name_fields[0];
176                blocker = blocker.add(PhoneticNameDobInitialKey::new(surname, first_name, dob));
177            } else {
178                blocker = blocker.add(PhoneticNameDobKey::new(surname, dob));
179            }
180            // Secondary key: catches pairs whose surnames differ phonetically but share birth year-month.
181            blocker = blocker.add(DateFragmentKey::new(dob, DateGranularity::YearMonth));
182        }
183
184        if let (Some(&first_name), Some(&addr)) = (name_fields.first(), addr_fields.first()) {
185            blocker = blocker.add(AddressInitialKey::new(addr, first_name));
186        }
187
188        if let Some(&phone) = phone_fields.first() {
189            blocker = blocker.add(SuffixKey::new(phone, 7));
190        }
191
192        for &id in &id_fields {
193            blocker = blocker.add(SuffixKey::new(id, 4));
194        }
195
196        if name_fields.is_empty() {
197            if let Some(&dob) = date_fields.first() {
198                blocker = blocker.add(DateFragmentKey::new(dob, DateGranularity::YearMonth));
199            }
200        }
201
202        for &cat in &cat_fields {
203            blocker = blocker.add(ExactFieldKey::new(cat));
204        }
205
206        blocker
207    }
208
209    /// Build a `CompositeBlocker` from a user-defined [`CustomSchemaCategory`].
210    ///
211    /// Each rule in the category is applied in order against `schema`'s
212    /// `FieldKind` annotations. Rules that reference field kinds not present
213    /// in the schema are silently skipped, no panic, no empty blocker.
214    pub fn from_custom_category(schema: &Schema, cat: CustomSchemaCategory) -> CompositeBlocker {
215        let mut blocker = CompositeBlocker::new();
216
217        for rule in cat.rules {
218            match rule {
219                CategoryRule::PhoneSuffix(n) => {
220                    for field in schema.fields_of_kind(FieldKind::Phone) {
221                        blocker = blocker.add(SuffixKey::new(field, n));
222                    }
223                }
224                CategoryRule::IdSuffix(n) => {
225                    for field in schema.fields_of_kind(FieldKind::Id) {
226                        blocker = blocker.add(SuffixKey::new(field, n));
227                    }
228                }
229                CategoryRule::DocumentSuffix(n) => {
230                    for field in schema.fields_of_kind(FieldKind::Id) {
231                        blocker = blocker.add(DocumentSuffixKey::new(field, n));
232                    }
233                }
234                CategoryRule::ExactCategorical => {
235                    for field in schema.fields_of_kind(FieldKind::Categorical) {
236                        blocker = blocker.add(ExactFieldKey::new(field));
237                    }
238                }
239                CategoryRule::DateFragment(granularity) => {
240                    if let Some(field) = schema.fields_of_kind(FieldKind::Date).next() {
241                        blocker = blocker.add(DateFragmentKey::new(field, granularity));
242                    }
243                }
244                CategoryRule::PhoneticNameDob => {
245                    let names: Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
246                    let dates: Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
247                    if let (Some(&surname), Some(&dob)) = (names.last(), dates.first()) {
248                        blocker = blocker.add(PhoneticNameDobKey::new(surname, dob));
249                    }
250                }
251                CategoryRule::PhoneticNameDobInitial => {
252                    let names: Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
253                    let dates: Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
254                    if let Some(&dob) = dates.first() {
255                        if names.len() >= 2 {
256                            let first_name = names[0];
257                            let surname = names[names.len() - 1];
258                            blocker = blocker
259                                .add(PhoneticNameDobInitialKey::new(surname, first_name, dob));
260                        } else if let Some(&surname) = names.last() {
261                            blocker = blocker.add(PhoneticNameDobKey::new(surname, dob));
262                        }
263                    }
264                }
265                CategoryRule::AddressInitial => {
266                    let names: Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
267                    let addrs: Vec<&str> = schema.fields_of_kind(FieldKind::Address).collect();
268                    if let (Some(&first_name), Some(&addr)) = (names.first(), addrs.first()) {
269                        blocker = blocker.add(AddressInitialKey::new(addr, first_name));
270                    }
271                }
272                CategoryRule::Custom(key) => {
273                    blocker = blocker.add_boxed(key);
274                }
275            }
276        }
277
278        blocker
279    }
280
281    fn telecom_blocker(schema: &Schema) -> CompositeBlocker {
282        let mut blocker = CompositeBlocker::new();
283        for f in schema.fields_of_kind(FieldKind::Phone) {
284            blocker = blocker.add(SuffixKey::new(f, 7));
285        }
286        for f in schema.fields_of_kind(FieldKind::Id) {
287            blocker = blocker.add(SuffixKey::new(f, 6));
288        }
289        for f in schema.fields_of_kind(FieldKind::Categorical) {
290            blocker = blocker.add(ExactFieldKey::new(f));
291        }
292        blocker
293    }
294
295    /// Build a `CompositeBlocker` tuned for a specific domain category.
296    ///
297    /// Keys are chosen based on both the category semantics and the
298    /// `FieldKind` annotations present in `schema`.
299    pub fn from_schema_category(schema: &Schema, category: SchemaCategory) -> CompositeBlocker {
300        match category {
301            SchemaCategory::PersonRegistry => Self::from_schema(schema),
302
303            SchemaCategory::WantedPersons => {
304                let mut blocker = CompositeBlocker::new();
305
306                let name_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
307                let date_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
308                let alias_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Alias).collect();
309                let id_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Id).collect();
310
311                if let (Some(&surname), Some(&dob)) = (name_fields.last(), date_fields.first()) {
312                    blocker = blocker.add(PhoneticNameDobKey::new(surname, dob));
313                    blocker = blocker.add(TransliteratedPhoneticKey::new(surname, dob));
314                    blocker = blocker.add(FuzzyYearKey::new(surname, dob, 1));
315                }
316
317                if let Some(&dob) = date_fields.first() {
318                    for &alias in &alias_fields {
319                        blocker = blocker.add(AliasPhoneticKey::new(alias, dob));
320                    }
321                }
322
323                for &id in &id_fields {
324                    blocker = blocker.add(DocumentSuffixKey::new(id, 6));
325                }
326
327                blocker
328            }
329
330            SchemaCategory::ANPRPassages => {
331                let mut blocker = CompositeBlocker::new();
332
333                let plate_fields: Vec<&str> =
334                    schema.fields_of_kind(FieldKind::LicensePlate).collect();
335                let ts_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Timestamp).collect();
336                let cat_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Categorical).collect();
337                let lat_fields: Vec<&str> =
338                    schema.fields_of_kind(FieldKind::GpsCoordinate).collect();
339
340                for &plate in &plate_fields {
341                    blocker = blocker.add(LicensePlateNormKey::new(plate));
342                    blocker = blocker.add(PlateOCRFuzzyKey::new(plate));
343                }
344
345                // camera_id + timestamp → 10-minute window key
346                if let (Some(&cam), Some(&ts)) = (cat_fields.first(), ts_fields.first()) {
347                    blocker = blocker.add(CameraTimeWindowKey::new(cam, ts, 10));
348                }
349
350                // lat + lon → 0.01° grid (~1 km)
351                if lat_fields.len() >= 2 {
352                    blocker = blocker.add(GeoGridKey::new(lat_fields[0], lat_fields[1], 0.01));
353                }
354
355                blocker
356            }
357
358            SchemaCategory::CallDetailRecords | SchemaCategory::SIMSubscribers => {
359                Self::telecom_blocker(schema)
360            }
361
362            SchemaCategory::FinancialIntelligence => {
363                let mut blocker = CompositeBlocker::new();
364
365                let id_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Id).collect();
366                let date_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
367                let cat_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Categorical).collect();
368
369                for &id in &id_fields {
370                    blocker = blocker.add(SuffixKey::new(id, 6));
371                }
372
373                if let Some(&dob) = date_fields.first() {
374                    blocker = blocker.add(DateFragmentKey::new(dob, DateGranularity::YearMonth));
375                }
376
377                for &cat in &cat_fields {
378                    blocker = blocker.add(ExactFieldKey::new(cat));
379                }
380
381                blocker
382            }
383        }
384    }
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390    use crate::index::InvertedIndex;
391    use zer_core::{
392        record::FieldValue,
393        schema::{FieldKind, SchemaBuilder},
394        traits::Blocker,
395    };
396
397    fn person_schema() -> Schema {
398        SchemaBuilder::new()
399            .field("voornamen", FieldKind::Name)
400            .field("achternaam", FieldKind::Name)
401            .field("geboortedatum", FieldKind::Date)
402            .field("woonplaats", FieldKind::Address)
403            .field("postcode", FieldKind::Id)
404            .build()
405            .unwrap()
406    }
407
408    #[test]
409    fn factory_name_date_schema_adds_secondary_year_month_key() {
410        // When Name + Date fields are present, from_schema() must add both
411        // PhoneticNameDobKey AND DateFragmentKey(YearMonth).  Two records with
412        // identical DOB but different surnames must still be candidates via the
413        // secondary key even if their phonetic codes diverge.
414        let schema = SchemaBuilder::new()
415            .field("voornamen", FieldKind::Name)
416            .field("achternaam", FieldKind::Name)
417            .field("geboortedatum", FieldKind::Date)
418            .build()
419            .unwrap();
420        let blocker = BlockerFactory::from_schema(&schema);
421
422        let mut idx = InvertedIndex::new();
423        let r1 = zer_core::record::Record::new(1)
424            .insert("achternaam", FieldValue::Text("Jansen".into()))
425            .insert("geboortedatum", FieldValue::Text("1985-06-15".into()));
426        // Completely different surname but same birth year-month.
427        let r2 = zer_core::record::Record::new(2)
428            .insert("achternaam", FieldValue::Text("Pietersen".into()))
429            .insert("geboortedatum", FieldValue::Text("1985-06-22".into()));
430
431        blocker.index_record(&r1, &schema, &mut idx);
432        blocker.index_record(&r2, &schema, &mut idx);
433
434        let cands = blocker.candidates(&r1, &schema, &idx);
435        assert!(
436            cands.contains(&2),
437            "secondary YearMonth key must surface r2 (same birth year-month, different surname)"
438        );
439    }
440
441    #[test]
442    fn factory_produces_non_empty_blocker() {
443        let schema = person_schema();
444        let blocker = BlockerFactory::from_schema(&schema);
445        let record = zer_core::record::Record::new(1)
446            .insert("achternaam", FieldValue::Text("Jansen".into()))
447            .insert("geboortedatum", FieldValue::Text("1980-01-15".into()));
448
449        let keys = blocker.blocking_keys(&record, &schema);
450        assert!(
451            !keys.is_empty(),
452            "BlockerFactory should produce at least one key"
453        );
454    }
455
456    #[test]
457    fn factory_date_only_schema_uses_date_fragment() {
458        let schema = SchemaBuilder::new()
459            .field("dob", FieldKind::Date)
460            .build()
461            .unwrap();
462        let blocker = BlockerFactory::from_schema(&schema);
463        let r =
464            zer_core::record::Record::new(1).insert("dob", FieldValue::Text("1990-06-01".into()));
465
466        let mut idx = InvertedIndex::new();
467        blocker.index_record(&r, &schema, &mut idx);
468        assert!(!idx.is_empty());
469    }
470
471    #[test]
472    fn category_wanted_persons_produces_keys() {
473        let schema = SchemaBuilder::new()
474            .field("voornamen", FieldKind::Name)
475            .field("achternaam", FieldKind::Name)
476            .field("alias_namen", FieldKind::Alias)
477            .field("geboortedatum", FieldKind::Date)
478            .field("document_nummer", FieldKind::Id)
479            .build()
480            .unwrap();
481        let blocker = BlockerFactory::from_schema_category(&schema, SchemaCategory::WantedPersons);
482        let r = zer_core::record::Record::new(1)
483            .insert("achternaam", FieldValue::Text("Benabdallah".into()))
484            .insert("geboortedatum", FieldValue::Text("1999-06-14".into()));
485
486        let keys = blocker.blocking_keys(&r, &schema);
487        assert!(!keys.is_empty());
488    }
489
490    #[test]
491    fn category_anpr_produces_plate_keys() {
492        let schema = SchemaBuilder::new()
493            .field("kenteken", FieldKind::LicensePlate)
494            .field("camera_id", FieldKind::Categorical)
495            .field("tijdstip", FieldKind::Timestamp)
496            .build()
497            .unwrap();
498        let blocker = BlockerFactory::from_schema_category(&schema, SchemaCategory::ANPRPassages);
499        let r = zer_core::record::Record::new(1)
500            .insert("kenteken", FieldValue::Text("25-XKL-9".into()))
501            .insert("camera_id", FieldValue::Text("CAM-A12-001".into()))
502            .insert("tijdstip", FieldValue::Text("2025-06-01T10:00:00".into()));
503
504        let keys = blocker.blocking_keys(&r, &schema);
505        assert!(!keys.is_empty());
506        assert!(keys.iter().any(|k| k.contains("25XKL9")));
507    }
508
509    // ── CustomSchemaCategory tests ────────────────────────────────────────────
510
511    #[test]
512    fn custom_phone_suffix_extracts_key() {
513        let schema = SchemaBuilder::new()
514            .field("telefoon", FieldKind::Phone)
515            .build()
516            .unwrap();
517        let cat = CustomSchemaCategory::new().with_phone_suffix(7);
518        let blocker = BlockerFactory::from_custom_category(&schema, cat);
519        let r = zer_core::record::Record::new(1)
520            .insert("telefoon", FieldValue::Text("0612345678".into()));
521
522        let keys = blocker.blocking_keys(&r, &schema);
523        assert!(!keys.is_empty(), "phone suffix rule must produce a key");
524        assert!(
525            keys.iter().any(|k| k.ends_with("2345678")),
526            "key must end with last 7 digits"
527        );
528    }
529
530    #[test]
531    fn custom_id_suffix_correct_length() {
532        let schema = SchemaBuilder::new()
533            .field("postcode", FieldKind::Id)
534            .build()
535            .unwrap();
536        let cat = CustomSchemaCategory::new().with_id_suffix(4);
537        let blocker = BlockerFactory::from_custom_category(&schema, cat);
538        let r =
539            zer_core::record::Record::new(1).insert("postcode", FieldValue::Text("1011AB".into()));
540
541        let keys = blocker.blocking_keys(&r, &schema);
542        // postcode "1011AB" → digits only = "1011" → last 4 = "1011"
543        assert!(
544            keys.iter().any(|k| k.ends_with("1011")),
545            "id suffix must be 4 digits: {keys:?}"
546        );
547    }
548
549    #[test]
550    fn custom_exact_categorical_matches_only_same_value() {
551        let schema = SchemaBuilder::new()
552            .field("tussenvoegsel", FieldKind::Categorical)
553            .build()
554            .unwrap();
555        let cat = CustomSchemaCategory::new().with_exact_categorical();
556        let blocker = BlockerFactory::from_custom_category(&schema, cat);
557
558        let mut idx = InvertedIndex::new();
559        let r1 = zer_core::record::Record::new(1)
560            .insert("tussenvoegsel", FieldValue::Text("van".into()));
561        let r2 = zer_core::record::Record::new(2)
562            .insert("tussenvoegsel", FieldValue::Text("van".into()));
563        let r3 =
564            zer_core::record::Record::new(3).insert("tussenvoegsel", FieldValue::Text("de".into()));
565
566        blocker.index_record(&r1, &schema, &mut idx);
567        blocker.index_record(&r2, &schema, &mut idx);
568        blocker.index_record(&r3, &schema, &mut idx);
569
570        let cands = blocker.candidates(&r1, &schema, &idx);
571        assert!(
572            cands.contains(&2),
573            "r2 (same tussenvoegsel) must be a candidate"
574        );
575        assert!(
576            !cands.contains(&3),
577            "r3 (different tussenvoegsel) must NOT be a candidate"
578        );
579    }
580
581    #[test]
582    fn custom_date_fragment_produces_year_month_key() {
583        let schema = SchemaBuilder::new()
584            .field("geboortedatum", FieldKind::Date)
585            .build()
586            .unwrap();
587        let cat = CustomSchemaCategory::new().with_date_fragment(DateGranularity::YearMonth);
588        let blocker = BlockerFactory::from_custom_category(&schema, cat);
589        let r = zer_core::record::Record::new(1)
590            .insert("geboortedatum", FieldValue::Text("1990-06-15".into()));
591
592        let keys = blocker.blocking_keys(&r, &schema);
593        assert!(
594            keys.iter().any(|k| k.contains("1990-06")),
595            "key must contain YYYY-MM: {keys:?}"
596        );
597    }
598
599    #[test]
600    fn custom_phonetic_name_dob_links_same_person() {
601        let schema = SchemaBuilder::new()
602            .field("voornamen", FieldKind::Name)
603            .field("achternaam", FieldKind::Name)
604            .field("geboortedatum", FieldKind::Date)
605            .build()
606            .unwrap();
607        let cat = CustomSchemaCategory::new().with_phonetic_name_dob();
608        let blocker = BlockerFactory::from_custom_category(&schema, cat);
609
610        let mut idx = InvertedIndex::new();
611        let r1 = zer_core::record::Record::new(1)
612            .insert("achternaam", FieldValue::Text("Jansen".into()))
613            .insert("geboortedatum", FieldValue::Text("1978-03-15".into()));
614        let r2 = zer_core::record::Record::new(2)
615            .insert("achternaam", FieldValue::Text("Jansen".into()))
616            .insert("geboortedatum", FieldValue::Text("1978-03-15".into()));
617        let r3 = zer_core::record::Record::new(3)
618            .insert("achternaam", FieldValue::Text("de Wit".into()))
619            .insert("geboortedatum", FieldValue::Text("1990-07-22".into()));
620
621        blocker.index_record(&r1, &schema, &mut idx);
622        blocker.index_record(&r2, &schema, &mut idx);
623        blocker.index_record(&r3, &schema, &mut idx);
624
625        let cands = blocker.candidates(&r1, &schema, &idx);
626        assert!(cands.contains(&2), "same surname+DOB must be a candidate");
627        assert!(
628            !cands.contains(&3),
629            "different surname+DOB must NOT be a candidate"
630        );
631    }
632
633    #[test]
634    fn custom_missing_field_kind_produces_no_panic() {
635        // Schema has no Phone fields; with_phone_suffix should silently produce nothing.
636        let schema = SchemaBuilder::new()
637            .field("achternaam", FieldKind::Name)
638            .build()
639            .unwrap();
640        let cat = CustomSchemaCategory::new().with_phone_suffix(7);
641        let blocker = BlockerFactory::from_custom_category(&schema, cat);
642        let r = zer_core::record::Record::new(1)
643            .insert("achternaam", FieldValue::Text("Jansen".into()));
644
645        let keys = blocker.blocking_keys(&r, &schema);
646        assert!(keys.is_empty(), "no Phone fields → no keys, no panic");
647    }
648
649    #[test]
650    fn custom_escape_hatch_with_key_works() {
651        let schema = SchemaBuilder::new()
652            .field("postcode", FieldKind::Id)
653            .build()
654            .unwrap();
655        // Provide a SuffixKey(4) via the escape hatch instead of with_id_suffix.
656        let cat = CustomSchemaCategory::new().with_key(SuffixKey::new("postcode", 4));
657        let blocker = BlockerFactory::from_custom_category(&schema, cat);
658        let r =
659            zer_core::record::Record::new(1).insert("postcode", FieldValue::Text("1011AB".into()));
660
661        let keys = blocker.blocking_keys(&r, &schema);
662        assert!(
663            !keys.is_empty(),
664            "escape-hatch key must produce at least one key"
665        );
666    }
667
668    #[test]
669    fn custom_combined_rules_produce_multiple_key_types() {
670        let schema = SchemaBuilder::new()
671            .field("voornamen", FieldKind::Name)
672            .field("achternaam", FieldKind::Name)
673            .field("geboortedatum", FieldKind::Date)
674            .field("postcode", FieldKind::Id)
675            .field("tussenvoegsel", FieldKind::Categorical)
676            .build()
677            .unwrap();
678        let cat = CustomSchemaCategory::new()
679            .with_phonetic_name_dob()
680            .with_id_suffix(4)
681            .with_exact_categorical();
682        let blocker = BlockerFactory::from_custom_category(&schema, cat);
683        let r = zer_core::record::Record::new(1)
684            .insert("achternaam", FieldValue::Text("van den Berg".into()))
685            .insert("geboortedatum", FieldValue::Text("1978-03-15".into()))
686            .insert("postcode", FieldValue::Text("1011AB".into()))
687            .insert("tussenvoegsel", FieldValue::Text("van den".into()));
688
689        let keys = blocker.blocking_keys(&r, &schema);
690        // Expect at least a phonetic key and a suffix key and a categorical key.
691        assert!(
692            keys.len() >= 3,
693            "combined rules must produce at least 3 keys: {keys:?}"
694        );
695    }
696}