1use rphonetic::{DoubleMetaphone, Encoder, Soundex};
2use zer_core::{record::Record, schema::Schema};
3
4use crate::normalize::{extract_surname_token, normalize_text};
5use super::{BlockingKey, phonetic::PhoneticAlgo};
6
7pub struct PhoneticNameDobInitialKey {
14 algo: PhoneticAlgo,
15 name_field: String,
16 initial_field: String,
17 dob_field: String,
18}
19
20impl PhoneticNameDobInitialKey {
21 pub fn new(name_field: &str, initial_field: &str, dob_field: &str) -> Self {
22 Self {
23 algo: PhoneticAlgo::DoubleMetaphone,
24 name_field: name_field.into(),
25 initial_field: initial_field.into(),
26 dob_field: dob_field.into(),
27 }
28 }
29
30 pub fn with_algo(mut self, algo: PhoneticAlgo) -> Self {
31 self.algo = algo;
32 self
33 }
34
35 fn encode(&self, s: &str) -> String {
36 match self.algo {
37 PhoneticAlgo::DoubleMetaphone => DoubleMetaphone::default().encode(s),
38 PhoneticAlgo::Soundex => Soundex::default().encode(s),
39 }
40 }
41}
42
43impl BlockingKey for PhoneticNameDobInitialKey {
44 fn name(&self) -> &str {
45 "phonetic_initial_dob"
46 }
47
48 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
49 let surname_cow = record.field_as_str(&self.name_field);
50 let surname_raw = match surname_cow.as_deref() {
51 Some(s) => s,
52 None => return vec![],
53 };
54 let initial_cow = record.field_as_str(&self.initial_field);
55 let initial_raw = match initial_cow.as_deref() {
56 Some(s) => s,
57 None => return vec![],
58 };
59 let dob_cow = record.field_as_str(&self.dob_field);
60 let dob_raw = match dob_cow.as_deref() {
61 Some(s) => s,
62 None => return vec![],
63 };
64
65 let normalized = normalize_text(surname_raw);
66 let surname = extract_surname_token(&normalized);
67 if surname.is_empty() {
68 return vec![];
69 }
70
71 let code = self.encode(surname);
72 if code.is_empty() {
73 return vec![];
74 }
75
76 let initial = initial_raw
77 .trim()
78 .chars()
79 .next()
80 .map(|c| c.to_uppercase().to_string())
81 .unwrap_or_default();
82 if initial.is_empty() {
83 return vec![];
84 }
85
86 let year = dob_raw.trim().get(..4).unwrap_or("").to_string();
87 if year.len() < 4 || !year.chars().all(|c| c.is_ascii_digit()) {
88 return vec![];
89 }
90
91 vec![format!("{}:{}:{}", code, initial, year)]
92 }
93}
94
95#[cfg(test)]
96mod tests {
97 use super::*;
98 use zer_core::{record::FieldValue, schema::SchemaBuilder, schema::FieldKind};
99
100 fn make_schema() -> Schema {
101 SchemaBuilder::new()
102 .field("last_name", FieldKind::Name)
103 .field("first_name", FieldKind::Name)
104 .field("dob", FieldKind::Date)
105 .build()
106 .unwrap()
107 }
108
109 fn make_record(id: u64, last: &str, first: &str, dob: &str) -> Record {
110 Record::new(id)
111 .insert("last_name", FieldValue::Text(last.into()))
112 .insert("first_name", FieldValue::Text(first.into()))
113 .insert("dob", FieldValue::Text(dob.into()))
114 }
115
116 #[test]
117 fn same_surname_same_initial_same_year_collide() {
118 let schema = make_schema();
119 let key = PhoneticNameDobInitialKey::new("last_name", "first_name", "dob");
120
121 let r1 = make_record(1, "Jong", "Anna", "1990-03-01");
122 let r2 = make_record(2, "Jong", "Annelies", "1990-07-15");
123
124 let k1 = key.extract(&r1, &schema);
125 let k2 = key.extract(&r2, &schema);
126 assert!(!k1.is_empty());
127 assert_eq!(k1, k2, "Same surname/initial/year should collide");
128 }
129
130 #[test]
131 fn same_surname_different_initial_no_collision() {
132 let schema = make_schema();
133 let key = PhoneticNameDobInitialKey::new("last_name", "first_name", "dob");
134
135 let r1 = make_record(1, "Jong", "Anna", "1990-03-01");
136 let r2 = make_record(2, "Jong", "Pieter", "1990-03-01");
137
138 let k1 = key.extract(&r1, &schema);
139 let k2 = key.extract(&r2, &schema);
140 assert_ne!(k1, k2, "Different initials should not collide");
141 }
142
143 #[test]
144 fn different_dob_year_no_collision() {
145 let schema = make_schema();
146 let key = PhoneticNameDobInitialKey::new("last_name", "first_name", "dob");
147
148 let r1 = make_record(1, "Berg", "Anna", "1970-01-01");
149 let r2 = make_record(2, "Berg", "Anna", "1985-01-01");
150
151 let k1 = key.extract(&r1, &schema);
152 let k2 = key.extract(&r2, &schema);
153 assert_ne!(k1, k2, "Different DOB years should not collide");
154 }
155
156 #[test]
157 fn missing_initial_field_returns_empty() {
158 let schema = make_schema();
159 let key = PhoneticNameDobInitialKey::new("last_name", "first_name", "dob");
160
161 let r = Record::new(1)
162 .insert("last_name", FieldValue::Text("Berg".into()))
163 .insert("dob", FieldValue::Text("1990-01-01".into()));
164 assert!(key.extract(&r, &schema).is_empty());
165 }
166
167 #[test]
168 fn tussenvoegsel_stripped_from_surname() {
169 let schema = make_schema();
170 let key = PhoneticNameDobInitialKey::new("last_name", "first_name", "dob");
171
172 let r1 = make_record(1, "van den Berg", "Anna", "1990-06-15");
173 let r2 = make_record(2, "Berg", "Anna", "1990-06-15");
174
175 let k1 = key.extract(&r1, &schema);
176 let k2 = key.extract(&r2, &schema);
177 assert!(!k1.is_empty());
178 assert_eq!(k1, k2, "van den Berg and Berg should collide after prefix stripping");
179 }
180}