Skip to main content

zer_blocking/
factory.rs

1use zer_core::schema::{FieldKind, Schema};
2
3use crate::{
4    blocker::CompositeBlocker,
5    keys::{
6        AddressInitialKey, AliasPhoneticKey, CameraTimeWindowKey, DateFragmentKey,
7        DateGranularity, DocumentSuffixKey, ExactFieldKey, FuzzyYearKey, GeoGridKey,
8        LicensePlateNormKey, PhoneticNameDobInitialKey, PhoneticNameDobKey, PlateOCRFuzzyKey,
9        SuffixKey, TransliteratedPhoneticKey, BlockingKey,
10    },
11};
12
13/// High-level domain category for a dataset.
14///
15/// Pass to `BlockerFactory::from_schema_category` to get a `CompositeBlocker`
16/// whose keys are pre-tuned for that category, rather than relying solely on
17/// generic `FieldKind` heuristics.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum SchemaCategory {
20    /// General person registry (KvK directors, population registers).
21    PersonRegistry,
22    /// SIS II wanted/missing persons, aliases, name transpositions, estimated DOBs.
23    WantedPersons,
24    /// ANPR vehicle passage logs, license plate OCR, camera/time windows, GPS grid.
25    ANPRPassages,
26    /// Call detail records, phone number suffix, categorical (cell tower / IMEI).
27    CallDetailRecords,
28    /// SIM subscriber snapshots, phone suffix, IMSI/ICCID suffix, categorical.
29    SIMSubscribers,
30    /// FIU financial intelligence, account/transaction ID suffix, date fragments.
31    FinancialIntelligence,
32}
33
34/// A single blocking rule used inside [`CustomSchemaCategory`].
35///
36/// Each variant describes how to derive one set of blocking keys from a
37/// schema's `FieldKind` annotations. The `Custom` variant is an escape
38/// hatch for any key the built-in rules don't cover.
39enum CategoryRule {
40    /// `SuffixKey(n)` on every `FieldKind::Phone` field.
41    PhoneSuffix(usize),
42    /// `SuffixKey(n)` on every `FieldKind::Id` field.
43    IdSuffix(usize),
44    /// `DocumentSuffixKey(n)` on every `FieldKind::Id` field (strips non-alphanumeric, uppercases).
45    DocumentSuffix(usize),
46    /// `ExactFieldKey` on every `FieldKind::Categorical` field.
47    ExactCategorical,
48    /// `DateFragmentKey` with the given granularity on the first `FieldKind::Date` field.
49    DateFragment(DateGranularity),
50    /// `PhoneticNameDobKey` using the last `Name` field as surname and first `Date` field as DOB.
51    PhoneticNameDob,
52    /// `PhoneticNameDobInitialKey`: surname phonetic + first-name initial + DOB year.
53    /// Requires at least two `Name` fields (first = given name, last = surname).
54    PhoneticNameDobInitial,
55    /// `AddressInitialKey` using the first `Address` field and first `Name` field as initial.
56    AddressInitial,
57    /// A fully custom key, the escape hatch for anything the built-in rules don't cover.
58    Custom(Box<dyn BlockingKey>),
59}
60
61/// A user-defined blocking category assembled from individual rules.
62///
63/// Build one with the fluent `with_*` methods and pass it to
64/// [`BlockerFactory::from_custom_category`].
65///
66/// # Example
67/// ```
68/// use zer_blocking::{BlockerFactory, CustomSchemaCategory};
69/// use zer_blocking::keys::DateGranularity;
70///
71/// let category = CustomSchemaCategory::new()
72///     .with_phonetic_name_dob()
73///     .with_id_suffix(4)
74///     .with_exact_categorical();
75///
76/// // let blocker = BlockerFactory::from_custom_category(&schema, category);
77/// ```
78pub struct CustomSchemaCategory {
79    rules: Vec<CategoryRule>,
80}
81
82impl CustomSchemaCategory {
83    pub fn new() -> Self {
84        Self { rules: vec![] }
85    }
86
87    /// Add a `SuffixKey(n)` on every `Phone` field.
88    pub fn with_phone_suffix(mut self, n: usize) -> Self {
89        self.rules.push(CategoryRule::PhoneSuffix(n));
90        self
91    }
92
93    /// Add a `SuffixKey(n)` on every `Id` field (digits-only suffix).
94    pub fn with_id_suffix(mut self, n: usize) -> Self {
95        self.rules.push(CategoryRule::IdSuffix(n));
96        self
97    }
98
99    /// Add a `DocumentSuffixKey(n)` on every `Id` field (alphanumeric suffix, uppercased).
100    pub fn with_document_suffix(mut self, n: usize) -> Self {
101        self.rules.push(CategoryRule::DocumentSuffix(n));
102        self
103    }
104
105    /// Add an `ExactFieldKey` on every `Categorical` field.
106    pub fn with_exact_categorical(mut self) -> Self {
107        self.rules.push(CategoryRule::ExactCategorical);
108        self
109    }
110
111    /// Add a `DateFragmentKey` with the given granularity on the first `Date` field.
112    pub fn with_date_fragment(mut self, granularity: DateGranularity) -> Self {
113        self.rules.push(CategoryRule::DateFragment(granularity));
114        self
115    }
116
117    /// Add a `PhoneticNameDobKey` using the last `Name` field and the first `Date` field.
118    pub fn with_phonetic_name_dob(mut self) -> Self {
119        self.rules.push(CategoryRule::PhoneticNameDob);
120        self
121    }
122
123    /// Add a `PhoneticNameDobInitialKey` (surname phonetic + first-name initial + DOB year).
124    /// Falls back to `PhoneticNameDobKey` when only one `Name` field is present.
125    pub fn with_phonetic_name_dob_initial(mut self) -> Self {
126        self.rules.push(CategoryRule::PhoneticNameDobInitial);
127        self
128    }
129
130    /// Add an `AddressInitialKey` using the first `Address` field and first `Name` field as initial.
131    pub fn with_address_initial(mut self) -> Self {
132        self.rules.push(CategoryRule::AddressInitial);
133        self
134    }
135
136    /// Add an arbitrary blocking key, escape hatch for keys the built-in rules don't cover.
137    pub fn with_key(mut self, key: impl BlockingKey + 'static) -> Self {
138        self.rules.push(CategoryRule::Custom(Box::new(key)));
139        self
140    }
141}
142
143impl Default for CustomSchemaCategory {
144    fn default() -> Self {
145        Self::new()
146    }
147}
148
149pub struct BlockerFactory;
150
151impl BlockerFactory {
152    /// Build a `CompositeBlocker` whose blocking keys are chosen automatically
153    /// from the `Schema`'s `FieldKind` annotations.
154    ///
155    /// Priority rules (applied in order):
156    /// - 2+ Name fields + Date: uses `PhoneticNameDobInitialKey` (surname phonetic + first-name initial + DOB year)
157    /// - 1 Name field + Date: uses `PhoneticNameDobKey` (surname phonetic + DOB year)
158    /// - Name + Address: uses `AddressInitialKey` (first address token + first name initial)
159    /// - Phone: adds `SuffixKey(7)` on the first Phone field
160    /// - Id: adds `SuffixKey(4)` on each Id field
161    /// - Date only (no Name): adds `DateFragmentKey(YearMonth)` on the first Date field
162    /// - Categorical: adds `ExactFieldKey` on each Categorical field
163    pub fn from_schema(schema: &Schema) -> CompositeBlocker {
164        let mut blocker = CompositeBlocker::new();
165
166        let name_fields: Vec<&str>  = schema.fields_of_kind(FieldKind::Name).collect();
167        let date_fields: Vec<&str>  = schema.fields_of_kind(FieldKind::Date).collect();
168        let addr_fields: Vec<&str>  = schema.fields_of_kind(FieldKind::Address).collect();
169        let phone_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Phone).collect();
170        let id_fields: Vec<&str>    = schema.fields_of_kind(FieldKind::Id).collect();
171        let cat_fields: Vec<&str>   = schema.fields_of_kind(FieldKind::Categorical).collect();
172
173        if let (Some(&surname), Some(&dob)) = (name_fields.last(), date_fields.first()) {
174            if name_fields.len() >= 2 {
175                let first_name = name_fields[0];
176                blocker = blocker.add(PhoneticNameDobInitialKey::new(surname, first_name, dob));
177            } else {
178                blocker = blocker.add(PhoneticNameDobKey::new(surname, dob));
179            }
180            // Secondary key: catches pairs whose surnames differ phonetically but share birth year-month.
181            blocker = blocker.add(DateFragmentKey::new(dob, DateGranularity::YearMonth));
182        }
183
184        if let (Some(&first_name), Some(&addr)) = (name_fields.first(), addr_fields.first()) {
185            blocker = blocker.add(AddressInitialKey::new(addr, first_name));
186        }
187
188        if let Some(&phone) = phone_fields.first() {
189            blocker = blocker.add(SuffixKey::new(phone, 7));
190        }
191
192        for &id in &id_fields {
193            blocker = blocker.add(SuffixKey::new(id, 4));
194        }
195
196        if name_fields.is_empty() {
197            if let Some(&dob) = date_fields.first() {
198                blocker = blocker.add(DateFragmentKey::new(dob, DateGranularity::YearMonth));
199            }
200        }
201
202        for &cat in &cat_fields {
203            blocker = blocker.add(ExactFieldKey::new(cat));
204        }
205
206        blocker
207    }
208
209    /// Build a `CompositeBlocker` from a user-defined [`CustomSchemaCategory`].
210    ///
211    /// Each rule in the category is applied in order against `schema`'s
212    /// `FieldKind` annotations. Rules that reference field kinds not present
213    /// in the schema are silently skipped, no panic, no empty blocker.
214    pub fn from_custom_category(schema: &Schema, cat: CustomSchemaCategory) -> CompositeBlocker {
215        let mut blocker = CompositeBlocker::new();
216
217        for rule in cat.rules {
218            match rule {
219                CategoryRule::PhoneSuffix(n) => {
220                    for field in schema.fields_of_kind(FieldKind::Phone) {
221                        blocker = blocker.add(SuffixKey::new(field, n));
222                    }
223                }
224                CategoryRule::IdSuffix(n) => {
225                    for field in schema.fields_of_kind(FieldKind::Id) {
226                        blocker = blocker.add(SuffixKey::new(field, n));
227                    }
228                }
229                CategoryRule::DocumentSuffix(n) => {
230                    for field in schema.fields_of_kind(FieldKind::Id) {
231                        blocker = blocker.add(DocumentSuffixKey::new(field, n));
232                    }
233                }
234                CategoryRule::ExactCategorical => {
235                    for field in schema.fields_of_kind(FieldKind::Categorical) {
236                        blocker = blocker.add(ExactFieldKey::new(field));
237                    }
238                }
239                CategoryRule::DateFragment(granularity) => {
240                    if let Some(field) = schema.fields_of_kind(FieldKind::Date).next() {
241                        blocker = blocker.add(DateFragmentKey::new(field, granularity));
242                    }
243                }
244                CategoryRule::PhoneticNameDob => {
245                    let names: Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
246                    let dates: Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
247                    if let (Some(&surname), Some(&dob)) = (names.last(), dates.first()) {
248                        blocker = blocker.add(PhoneticNameDobKey::new(surname, dob));
249                    }
250                }
251                CategoryRule::PhoneticNameDobInitial => {
252                    let names: Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
253                    let dates: Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
254                    if let Some(&dob) = dates.first() {
255                        if names.len() >= 2 {
256                            let first_name = names[0];
257                            let surname    = names[names.len() - 1];
258                            blocker = blocker.add(PhoneticNameDobInitialKey::new(surname, first_name, dob));
259                        } else if let Some(&surname) = names.last() {
260                            blocker = blocker.add(PhoneticNameDobKey::new(surname, dob));
261                        }
262                    }
263                }
264                CategoryRule::AddressInitial => {
265                    let names: Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
266                    let addrs: Vec<&str> = schema.fields_of_kind(FieldKind::Address).collect();
267                    if let (Some(&first_name), Some(&addr)) = (names.first(), addrs.first()) {
268                        blocker = blocker.add(AddressInitialKey::new(addr, first_name));
269                    }
270                }
271                CategoryRule::Custom(key) => {
272                    blocker = blocker.add_boxed(key);
273                }
274            }
275        }
276
277        blocker
278    }
279
280    fn telecom_blocker(schema: &Schema) -> CompositeBlocker {
281        let mut blocker = CompositeBlocker::new();
282        for f in schema.fields_of_kind(FieldKind::Phone)       { blocker = blocker.add(SuffixKey::new(f, 7)); }
283        for f in schema.fields_of_kind(FieldKind::Id)          { blocker = blocker.add(SuffixKey::new(f, 6)); }
284        for f in schema.fields_of_kind(FieldKind::Categorical) { blocker = blocker.add(ExactFieldKey::new(f)); }
285        blocker
286    }
287
288    /// Build a `CompositeBlocker` tuned for a specific domain category.
289    ///
290    /// Keys are chosen based on both the category semantics and the
291    /// `FieldKind` annotations present in `schema`.
292    pub fn from_schema_category(schema: &Schema, category: SchemaCategory) -> CompositeBlocker {
293        match category {
294            SchemaCategory::PersonRegistry => Self::from_schema(schema),
295
296            SchemaCategory::WantedPersons => {
297                let mut blocker = CompositeBlocker::new();
298
299                let name_fields:  Vec<&str> = schema.fields_of_kind(FieldKind::Name).collect();
300                let date_fields:  Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
301                let alias_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Alias).collect();
302                let id_fields:    Vec<&str> = schema.fields_of_kind(FieldKind::Id).collect();
303
304                if let (Some(&surname), Some(&dob)) = (name_fields.last(), date_fields.first()) {
305                    blocker = blocker.add(PhoneticNameDobKey::new(surname, dob));
306                    blocker = blocker.add(TransliteratedPhoneticKey::new(surname, dob));
307                    blocker = blocker.add(FuzzyYearKey::new(surname, dob, 1));
308                }
309
310                if let Some(&dob) = date_fields.first() {
311                    for &alias in &alias_fields {
312                        blocker = blocker.add(AliasPhoneticKey::new(alias, dob));
313                    }
314                }
315
316                for &id in &id_fields {
317                    blocker = blocker.add(DocumentSuffixKey::new(id, 6));
318                }
319
320                blocker
321            }
322
323            SchemaCategory::ANPRPassages => {
324                let mut blocker = CompositeBlocker::new();
325
326                let plate_fields: Vec<&str> = schema.fields_of_kind(FieldKind::LicensePlate).collect();
327                let ts_fields:    Vec<&str> = schema.fields_of_kind(FieldKind::Timestamp).collect();
328                let cat_fields:   Vec<&str> = schema.fields_of_kind(FieldKind::Categorical).collect();
329                let lat_fields:   Vec<&str> = schema.fields_of_kind(FieldKind::GpsCoordinate).collect();
330
331                for &plate in &plate_fields {
332                    blocker = blocker.add(LicensePlateNormKey::new(plate));
333                    blocker = blocker.add(PlateOCRFuzzyKey::new(plate));
334                }
335
336                // camera_id + timestamp → 10-minute window key
337                if let (Some(&cam), Some(&ts)) = (cat_fields.first(), ts_fields.first()) {
338                    blocker = blocker.add(CameraTimeWindowKey::new(cam, ts, 10));
339                }
340
341                // lat + lon → 0.01° grid (~1 km)
342                if lat_fields.len() >= 2 {
343                    blocker = blocker.add(GeoGridKey::new(lat_fields[0], lat_fields[1], 0.01));
344                }
345
346                blocker
347            }
348
349            SchemaCategory::CallDetailRecords |
350            SchemaCategory::SIMSubscribers => Self::telecom_blocker(schema),
351
352            SchemaCategory::FinancialIntelligence => {
353                let mut blocker = CompositeBlocker::new();
354
355                let id_fields:   Vec<&str> = schema.fields_of_kind(FieldKind::Id).collect();
356                let date_fields: Vec<&str> = schema.fields_of_kind(FieldKind::Date).collect();
357                let cat_fields:  Vec<&str> = schema.fields_of_kind(FieldKind::Categorical).collect();
358
359                for &id in &id_fields {
360                    blocker = blocker.add(SuffixKey::new(id, 6));
361                }
362
363                if let Some(&dob) = date_fields.first() {
364                    blocker = blocker.add(DateFragmentKey::new(dob, DateGranularity::YearMonth));
365                }
366
367                for &cat in &cat_fields {
368                    blocker = blocker.add(ExactFieldKey::new(cat));
369                }
370
371                blocker
372            }
373        }
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380    use zer_core::{
381        record::FieldValue,
382        schema::{FieldKind, SchemaBuilder},
383        traits::Blocker,
384    };
385    use crate::index::InvertedIndex;
386
387    fn person_schema() -> Schema {
388        SchemaBuilder::new()
389            .field("voornamen",    FieldKind::Name)
390            .field("achternaam",   FieldKind::Name)
391            .field("geboortedatum", FieldKind::Date)
392            .field("woonplaats",   FieldKind::Address)
393            .field("postcode",     FieldKind::Id)
394            .build()
395            .unwrap()
396    }
397
398    #[test]
399    fn factory_name_date_schema_adds_secondary_year_month_key() {
400        // When Name + Date fields are present, from_schema() must add both
401        // PhoneticNameDobKey AND DateFragmentKey(YearMonth).  Two records with
402        // identical DOB but different surnames must still be candidates via the
403        // secondary key even if their phonetic codes diverge.
404        let schema = SchemaBuilder::new()
405            .field("voornamen",     FieldKind::Name)
406            .field("achternaam",    FieldKind::Name)
407            .field("geboortedatum", FieldKind::Date)
408            .build()
409            .unwrap();
410        let blocker = BlockerFactory::from_schema(&schema);
411
412        let mut idx = InvertedIndex::new();
413        let r1 = zer_core::record::Record::new(1)
414            .insert("achternaam",    FieldValue::Text("Jansen".into()))
415            .insert("geboortedatum", FieldValue::Text("1985-06-15".into()));
416        // Completely different surname but same birth year-month.
417        let r2 = zer_core::record::Record::new(2)
418            .insert("achternaam",    FieldValue::Text("Pietersen".into()))
419            .insert("geboortedatum", FieldValue::Text("1985-06-22".into()));
420
421        blocker.index_record(&r1, &schema, &mut idx);
422        blocker.index_record(&r2, &schema, &mut idx);
423
424        let cands = blocker.candidates(&r1, &schema, &idx);
425        assert!(
426            cands.contains(&2),
427            "secondary YearMonth key must surface r2 (same birth year-month, different surname)"
428        );
429    }
430
431    #[test]
432    fn factory_produces_non_empty_blocker() {
433        let schema  = person_schema();
434        let blocker = BlockerFactory::from_schema(&schema);
435        let record  = zer_core::record::Record::new(1)
436            .insert("achternaam",   FieldValue::Text("Jansen".into()))
437            .insert("geboortedatum", FieldValue::Text("1980-01-15".into()));
438
439        let keys = blocker.blocking_keys(&record, &schema);
440        assert!(!keys.is_empty(), "BlockerFactory should produce at least one key");
441    }
442
443    #[test]
444    fn factory_date_only_schema_uses_date_fragment() {
445        let schema = SchemaBuilder::new()
446            .field("dob", FieldKind::Date)
447            .build()
448            .unwrap();
449        let blocker = BlockerFactory::from_schema(&schema);
450        let r = zer_core::record::Record::new(1)
451            .insert("dob", FieldValue::Text("1990-06-01".into()));
452
453        let mut idx = InvertedIndex::new();
454        blocker.index_record(&r, &schema, &mut idx);
455        assert!(!idx.is_empty());
456    }
457
458    #[test]
459    fn category_wanted_persons_produces_keys() {
460        let schema = SchemaBuilder::new()
461            .field("voornamen",    FieldKind::Name)
462            .field("achternaam",   FieldKind::Name)
463            .field("alias_namen",  FieldKind::Alias)
464            .field("geboortedatum", FieldKind::Date)
465            .field("document_nummer", FieldKind::Id)
466            .build()
467            .unwrap();
468        let blocker = BlockerFactory::from_schema_category(&schema, SchemaCategory::WantedPersons);
469        let r = zer_core::record::Record::new(1)
470            .insert("achternaam",   FieldValue::Text("Benabdallah".into()))
471            .insert("geboortedatum", FieldValue::Text("1999-06-14".into()));
472
473        let keys = blocker.blocking_keys(&r, &schema);
474        assert!(!keys.is_empty());
475    }
476
477    #[test]
478    fn category_anpr_produces_plate_keys() {
479        let schema = SchemaBuilder::new()
480            .field("kenteken",  FieldKind::LicensePlate)
481            .field("camera_id", FieldKind::Categorical)
482            .field("tijdstip",  FieldKind::Timestamp)
483            .build()
484            .unwrap();
485        let blocker = BlockerFactory::from_schema_category(&schema, SchemaCategory::ANPRPassages);
486        let r = zer_core::record::Record::new(1)
487            .insert("kenteken",  FieldValue::Text("25-XKL-9".into()))
488            .insert("camera_id", FieldValue::Text("CAM-A12-001".into()))
489            .insert("tijdstip",  FieldValue::Text("2025-06-01T10:00:00".into()));
490
491        let keys = blocker.blocking_keys(&r, &schema);
492        assert!(!keys.is_empty());
493        assert!(keys.iter().any(|k| k.contains("25XKL9")));
494    }
495
496    // ── CustomSchemaCategory tests ────────────────────────────────────────────
497
498    #[test]
499    fn custom_phone_suffix_extracts_key() {
500        let schema = SchemaBuilder::new()
501            .field("telefoon", FieldKind::Phone)
502            .build()
503            .unwrap();
504        let cat     = CustomSchemaCategory::new().with_phone_suffix(7);
505        let blocker = BlockerFactory::from_custom_category(&schema, cat);
506        let r = zer_core::record::Record::new(1)
507            .insert("telefoon", FieldValue::Text("0612345678".into()));
508
509        let keys = blocker.blocking_keys(&r, &schema);
510        assert!(!keys.is_empty(), "phone suffix rule must produce a key");
511        assert!(keys.iter().any(|k| k.ends_with("2345678")), "key must end with last 7 digits");
512    }
513
514    #[test]
515    fn custom_id_suffix_correct_length() {
516        let schema = SchemaBuilder::new()
517            .field("postcode", FieldKind::Id)
518            .build()
519            .unwrap();
520        let cat     = CustomSchemaCategory::new().with_id_suffix(4);
521        let blocker = BlockerFactory::from_custom_category(&schema, cat);
522        let r = zer_core::record::Record::new(1)
523            .insert("postcode", FieldValue::Text("1011AB".into()));
524
525        let keys = blocker.blocking_keys(&r, &schema);
526        // postcode "1011AB" → digits only = "1011" → last 4 = "1011"
527        assert!(keys.iter().any(|k| k.ends_with("1011")), "id suffix must be 4 digits: {keys:?}");
528    }
529
530    #[test]
531    fn custom_exact_categorical_matches_only_same_value() {
532        let schema = SchemaBuilder::new()
533            .field("tussenvoegsel", FieldKind::Categorical)
534            .build()
535            .unwrap();
536        let cat     = CustomSchemaCategory::new().with_exact_categorical();
537        let blocker = BlockerFactory::from_custom_category(&schema, cat);
538
539        let mut idx = InvertedIndex::new();
540        let r1 = zer_core::record::Record::new(1).insert("tussenvoegsel", FieldValue::Text("van".into()));
541        let r2 = zer_core::record::Record::new(2).insert("tussenvoegsel", FieldValue::Text("van".into()));
542        let r3 = zer_core::record::Record::new(3).insert("tussenvoegsel", FieldValue::Text("de".into()));
543
544        blocker.index_record(&r1, &schema, &mut idx);
545        blocker.index_record(&r2, &schema, &mut idx);
546        blocker.index_record(&r3, &schema, &mut idx);
547
548        let cands = blocker.candidates(&r1, &schema, &idx);
549        assert!(cands.contains(&2), "r2 (same tussenvoegsel) must be a candidate");
550        assert!(!cands.contains(&3), "r3 (different tussenvoegsel) must NOT be a candidate");
551    }
552
553    #[test]
554    fn custom_date_fragment_produces_year_month_key() {
555        let schema = SchemaBuilder::new()
556            .field("geboortedatum", FieldKind::Date)
557            .build()
558            .unwrap();
559        let cat     = CustomSchemaCategory::new().with_date_fragment(DateGranularity::YearMonth);
560        let blocker = BlockerFactory::from_custom_category(&schema, cat);
561        let r = zer_core::record::Record::new(1)
562            .insert("geboortedatum", FieldValue::Text("1990-06-15".into()));
563
564        let keys = blocker.blocking_keys(&r, &schema);
565        assert!(keys.iter().any(|k| k.contains("1990-06")), "key must contain YYYY-MM: {keys:?}");
566    }
567
568    #[test]
569    fn custom_phonetic_name_dob_links_same_person() {
570        let schema = SchemaBuilder::new()
571            .field("voornamen",     FieldKind::Name)
572            .field("achternaam",    FieldKind::Name)
573            .field("geboortedatum", FieldKind::Date)
574            .build()
575            .unwrap();
576        let cat     = CustomSchemaCategory::new().with_phonetic_name_dob();
577        let blocker = BlockerFactory::from_custom_category(&schema, cat);
578
579        let mut idx = InvertedIndex::new();
580        let r1 = zer_core::record::Record::new(1)
581            .insert("achternaam",    FieldValue::Text("Jansen".into()))
582            .insert("geboortedatum", FieldValue::Text("1978-03-15".into()));
583        let r2 = zer_core::record::Record::new(2)
584            .insert("achternaam",    FieldValue::Text("Jansen".into()))
585            .insert("geboortedatum", FieldValue::Text("1978-03-15".into()));
586        let r3 = zer_core::record::Record::new(3)
587            .insert("achternaam",    FieldValue::Text("de Wit".into()))
588            .insert("geboortedatum", FieldValue::Text("1990-07-22".into()));
589
590        blocker.index_record(&r1, &schema, &mut idx);
591        blocker.index_record(&r2, &schema, &mut idx);
592        blocker.index_record(&r3, &schema, &mut idx);
593
594        let cands = blocker.candidates(&r1, &schema, &idx);
595        assert!(cands.contains(&2), "same surname+DOB must be a candidate");
596        assert!(!cands.contains(&3), "different surname+DOB must NOT be a candidate");
597    }
598
599    #[test]
600    fn custom_missing_field_kind_produces_no_panic() {
601        // Schema has no Phone fields; with_phone_suffix should silently produce nothing.
602        let schema = SchemaBuilder::new()
603            .field("achternaam", FieldKind::Name)
604            .build()
605            .unwrap();
606        let cat     = CustomSchemaCategory::new().with_phone_suffix(7);
607        let blocker = BlockerFactory::from_custom_category(&schema, cat);
608        let r = zer_core::record::Record::new(1)
609            .insert("achternaam", FieldValue::Text("Jansen".into()));
610
611        let keys = blocker.blocking_keys(&r, &schema);
612        assert!(keys.is_empty(), "no Phone fields → no keys, no panic");
613    }
614
615    #[test]
616    fn custom_escape_hatch_with_key_works() {
617        let schema = SchemaBuilder::new()
618            .field("postcode", FieldKind::Id)
619            .build()
620            .unwrap();
621        // Provide a SuffixKey(4) via the escape hatch instead of with_id_suffix.
622        let cat     = CustomSchemaCategory::new().with_key(SuffixKey::new("postcode", 4));
623        let blocker = BlockerFactory::from_custom_category(&schema, cat);
624        let r = zer_core::record::Record::new(1)
625            .insert("postcode", FieldValue::Text("1011AB".into()));
626
627        let keys = blocker.blocking_keys(&r, &schema);
628        assert!(!keys.is_empty(), "escape-hatch key must produce at least one key");
629    }
630
631    #[test]
632    fn custom_combined_rules_produce_multiple_key_types() {
633        let schema = SchemaBuilder::new()
634            .field("voornamen",     FieldKind::Name)
635            .field("achternaam",    FieldKind::Name)
636            .field("geboortedatum", FieldKind::Date)
637            .field("postcode",      FieldKind::Id)
638            .field("tussenvoegsel", FieldKind::Categorical)
639            .build()
640            .unwrap();
641        let cat = CustomSchemaCategory::new()
642            .with_phonetic_name_dob()
643            .with_id_suffix(4)
644            .with_exact_categorical();
645        let blocker = BlockerFactory::from_custom_category(&schema, cat);
646        let r = zer_core::record::Record::new(1)
647            .insert("achternaam",    FieldValue::Text("van den Berg".into()))
648            .insert("geboortedatum", FieldValue::Text("1978-03-15".into()))
649            .insert("postcode",      FieldValue::Text("1011AB".into()))
650            .insert("tussenvoegsel", FieldValue::Text("van den".into()));
651
652        let keys = blocker.blocking_keys(&r, &schema);
653        // Expect at least a phonetic key and a suffix key and a categorical key.
654        assert!(keys.len() >= 3, "combined rules must produce at least 3 keys: {keys:?}");
655    }
656}