1use crate::identifiers;
41use crate::models::{Address, PassportBook, Worker};
42use crate::nicknames::NicknameTable;
43use crate::normalizer::Normalizer;
44use crate::scorer::{Scorer, SimilarityAlgorithm};
45use chrono::{Datelike, NaiveDate};
46use serde::{Deserialize, Serialize};
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
133#[serde(default)]
134pub struct MatchConfig {
135 pub match_threshold: f64,
137
138 pub uk_nhs_number_weight: f64,
140
141 pub fr_nir_weight: f64,
143
144 pub es_tsi_weight: f64,
146
147 pub ie_ihi_weight: f64,
149
150 pub uk_hc_number_weight: f64,
152
153 pub us_ssn_weight: f64,
155
156 pub au_ihi_weight: f64,
158
159 pub de_kvnr_weight: f64,
161
162 pub it_cf_weight: f64,
164
165 pub nl_bsn_weight: f64,
167
168 pub se_workernummer_weight: f64,
170
171 pub uk_chi_number_weight: f64,
173
174 pub be_nn_weight: f64,
176 pub bg_egn_weight: f64,
178 pub cz_rc_weight: f64,
180 pub dk_cpr_weight: f64,
182 pub ee_ik_weight: f64,
184 pub es_dni_weight: f64,
186 pub fi_hetu_weight: f64,
188 pub hr_oib_weight: f64,
190 pub is_kt_weight: f64,
192 pub lt_ak_weight: f64,
194 pub lv_pk_weight: f64,
196 pub mt_id_weight: f64,
198 pub no_fnr_weight: f64,
200 pub pl_pesel_weight: f64,
202 pub ro_cnp_weight: f64,
204 pub si_emso_weight: f64,
206 pub sk_rc_weight: f64,
208 pub uk_nino_weight: f64,
210 pub gr_dss_weight: f64,
212 pub li_id_weight: f64,
214 pub nl_id_weight: f64,
216 pub pl_nip_weight: f64,
218 pub pt_nif_weight: f64,
220 pub br_cpf_weight: f64,
222 pub cn_rrn_weight: f64,
224 pub in_aadhaar_weight: f64,
226 pub jp_my_number_weight: f64,
228 pub mx_curp_weight: f64,
230 pub nz_nhi_weight: f64,
232 pub za_id_weight: f64,
234
235 pub passport_book_weight: f64,
239
240 pub given_name_weight: f64,
242
243 pub family_name_weight: f64,
245
246 pub date_of_birth_weight: f64,
248
249 pub gender_weight: f64,
251
252 pub blood_type_weight: f64,
257
258 pub multiple_birth_weight: f64,
264
265 pub address_weight: f64,
267
268 pub birth_place_weight: f64,
274
275 pub death_date_weight: f64,
281
282 pub death_place_weight: f64,
288
289 pub phone_weight: f64,
291
292 pub email_weight: f64,
295
296 pub use_phonetic_matching: bool,
298
299 pub name_algorithm: SimilarityAlgorithm,
301
302 pub strict_mode: bool,
304
305 pub gmail_dot_folding: bool,
312
313 pub nickname_table: NicknameTable,
328
329 pub phone_default_country: Option<String>,
344}
345
346impl Default for MatchConfig {
347 fn default() -> Self {
357 Self {
358 match_threshold: 0.85,
359 uk_nhs_number_weight: 0.30,
360 fr_nir_weight: 0.30,
361 es_tsi_weight: 0.30,
362 ie_ihi_weight: 0.30,
363 uk_hc_number_weight: 0.30,
364 us_ssn_weight: 0.30,
365 au_ihi_weight: 0.30,
366 de_kvnr_weight: 0.30,
367 it_cf_weight: 0.30,
368 nl_bsn_weight: 0.30,
369 se_workernummer_weight: 0.30,
370 uk_chi_number_weight: 0.30,
371 be_nn_weight: 0.30,
372 bg_egn_weight: 0.30,
373 cz_rc_weight: 0.30,
374 dk_cpr_weight: 0.30,
375 ee_ik_weight: 0.30,
376 es_dni_weight: 0.30,
377 fi_hetu_weight: 0.30,
378 hr_oib_weight: 0.30,
379 is_kt_weight: 0.30,
380 lt_ak_weight: 0.30,
381 lv_pk_weight: 0.30,
382 mt_id_weight: 0.30,
383 no_fnr_weight: 0.30,
384 pl_pesel_weight: 0.30,
385 ro_cnp_weight: 0.30,
386 si_emso_weight: 0.30,
387 sk_rc_weight: 0.30,
388 uk_nino_weight: 0.30,
389 gr_dss_weight: 0.30,
390 li_id_weight: 0.30,
391 nl_id_weight: 0.30,
392 pl_nip_weight: 0.30,
393 pt_nif_weight: 0.30,
394 br_cpf_weight: 0.30,
395 cn_rrn_weight: 0.30,
396 in_aadhaar_weight: 0.30,
397 jp_my_number_weight: 0.30,
398 mx_curp_weight: 0.30,
399 nz_nhi_weight: 0.30,
400 za_id_weight: 0.30,
401 passport_book_weight: 0.30,
402 given_name_weight: 0.15,
403 family_name_weight: 0.20,
404 date_of_birth_weight: 0.20,
405 gender_weight: 0.05,
406 blood_type_weight: 0.05,
407 multiple_birth_weight: 0.05,
408 address_weight: 0.05,
409 birth_place_weight: 0.05,
410 death_date_weight: 0.10,
411 death_place_weight: 0.05,
412 phone_weight: 0.05,
413 email_weight: 0.05,
414 use_phonetic_matching: true,
415 name_algorithm: SimilarityAlgorithm::Combined,
416 strict_mode: false,
417 nickname_table: NicknameTable::empty(),
418 gmail_dot_folding: false,
419 phone_default_country: Some("GB".to_string()),
420 }
421 }
422}
423
424impl MatchConfig {
425 pub fn strict() -> Self {
437 Self {
438 match_threshold: 0.95,
439 strict_mode: true,
440 ..Default::default()
441 }
442 }
443
444 pub fn lenient() -> Self {
456 Self {
457 match_threshold: 0.75,
458 use_phonetic_matching: true,
459 ..Default::default()
460 }
461 }
462}
463
464#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
494pub enum Confidence {
495 High,
498 Medium,
501 Low,
504}
505
506impl Confidence {
507 pub fn from_score(score: f64) -> Self {
523 if score >= 0.90 {
524 Confidence::High
525 } else if score >= 0.75 {
526 Confidence::Medium
527 } else {
528 Confidence::Low
529 }
530 }
531}
532
533#[derive(Debug, Clone, Serialize, Deserialize)]
558pub struct MatchResult {
559 pub score: f64,
561
562 pub is_match: bool,
564
565 #[serde(default = "default_confidence")]
569 pub confidence: Confidence,
570
571 pub breakdown: MatchBreakdown,
573}
574
575fn default_confidence() -> Confidence {
580 Confidence::Low
581}
582
583#[derive(Debug, Clone, Serialize, Deserialize)]
594pub struct MatchBreakdown {
595 #[serde(default)]
597 pub uk_nhs_number_score: Option<f64>,
598 #[serde(default)]
600 pub fr_nir_score: Option<f64>,
601 #[serde(default)]
603 pub es_tsi_score: Option<f64>,
604 #[serde(default)]
606 pub ie_ihi_score: Option<f64>,
607 #[serde(default)]
609 pub uk_hc_number_score: Option<f64>,
610 #[serde(default)]
612 pub us_ssn_score: Option<f64>,
613 #[serde(default)]
615 pub au_ihi_score: Option<f64>,
616 #[serde(default)]
618 pub de_kvnr_score: Option<f64>,
619 #[serde(default)]
621 pub it_cf_score: Option<f64>,
622 #[serde(default)]
624 pub nl_bsn_score: Option<f64>,
625 #[serde(default)]
627 pub se_workernummer_score: Option<f64>,
628 #[serde(default)]
630 pub uk_chi_number_score: Option<f64>,
631 #[serde(default)]
633 pub be_nn_score: Option<f64>,
634 #[serde(default)]
636 pub bg_egn_score: Option<f64>,
637 #[serde(default)]
639 pub cz_rc_score: Option<f64>,
640 #[serde(default)]
642 pub dk_cpr_score: Option<f64>,
643 #[serde(default)]
645 pub ee_ik_score: Option<f64>,
646 #[serde(default)]
648 pub es_dni_score: Option<f64>,
649 #[serde(default)]
651 pub fi_hetu_score: Option<f64>,
652 #[serde(default)]
654 pub hr_oib_score: Option<f64>,
655 #[serde(default)]
657 pub is_kt_score: Option<f64>,
658 #[serde(default)]
660 pub lt_ak_score: Option<f64>,
661 #[serde(default)]
663 pub lv_pk_score: Option<f64>,
664 #[serde(default)]
666 pub mt_id_score: Option<f64>,
667 #[serde(default)]
669 pub no_fnr_score: Option<f64>,
670 #[serde(default)]
672 pub pl_pesel_score: Option<f64>,
673 #[serde(default)]
675 pub ro_cnp_score: Option<f64>,
676 #[serde(default)]
678 pub si_emso_score: Option<f64>,
679 #[serde(default)]
681 pub sk_rc_score: Option<f64>,
682 #[serde(default)]
684 pub uk_nino_score: Option<f64>,
685 #[serde(default)]
687 pub gr_dss_score: Option<f64>,
688 #[serde(default)]
690 pub li_id_score: Option<f64>,
691 #[serde(default)]
693 pub nl_id_score: Option<f64>,
694 #[serde(default)]
696 pub pl_nip_score: Option<f64>,
697 #[serde(default)]
699 pub pt_nif_score: Option<f64>,
700 #[serde(default)]
702 pub br_cpf_score: Option<f64>,
703 #[serde(default)]
705 pub cn_rrn_score: Option<f64>,
706 #[serde(default)]
708 pub in_aadhaar_score: Option<f64>,
709 #[serde(default)]
711 pub jp_my_number_score: Option<f64>,
712 #[serde(default)]
714 pub mx_curp_score: Option<f64>,
715 #[serde(default)]
717 pub nz_nhi_score: Option<f64>,
718 #[serde(default)]
720 pub za_id_score: Option<f64>,
721 #[serde(default)]
727 pub passport_book_score: Option<f64>,
728 pub given_name_score: Option<f64>,
730 pub family_name_score: Option<f64>,
732 pub date_of_birth_score: Option<f64>,
734 pub gender_score: Option<f64>,
736 #[serde(default)]
739 pub blood_type_score: Option<f64>,
740 #[serde(default)]
744 pub multiple_birth_score: Option<f64>,
745 pub address_score: Option<f64>,
747 #[serde(default)]
751 pub birth_place_score: Option<f64>,
752 #[serde(default)]
757 pub death_date_score: Option<f64>,
758 #[serde(default)]
763 pub death_place_score: Option<f64>,
764 pub phone_score: Option<f64>,
766 #[serde(default)]
769 pub email_score: Option<f64>,
770 pub phonetic_name_score: Option<f64>,
772}
773
774pub struct MatchingEngine {
788 config: MatchConfig,
789}
790
791impl MatchingEngine {
792 pub fn new(config: MatchConfig) -> Self {
800 Self { config }
801 }
802
803 pub fn default_config() -> Self {
811 Self::new(MatchConfig::default())
812 }
813
814 pub fn match_workers(&self, worker1: &Worker, worker2: &Worker) -> MatchResult {
834 let breakdown = self.calculate_breakdown(worker1, worker2);
835 let score = self.calculate_weighted_score(&breakdown);
836 let above_threshold = score >= self.config.match_threshold;
837 let is_match = if self.config.strict_mode {
843 above_threshold && self.deterministic_match(worker1, worker2)
844 } else {
845 above_threshold
846 };
847 let confidence = Confidence::from_score(score);
848
849 MatchResult {
850 score,
851 is_match,
852 confidence,
853 breakdown,
854 }
855 }
856
857 pub fn match_one_to_many(&self, query: &Worker, candidates: &[Worker]) -> Vec<MatchResult> {
906 candidates
907 .iter()
908 .map(|c| self.match_workers(query, c))
909 .collect()
910 }
911
912 pub fn rank_one_to_many(
942 &self,
943 query: &Worker,
944 candidates: &[Worker],
945 ) -> Vec<(usize, MatchResult)> {
946 let mut indexed: Vec<(usize, MatchResult)> = self
947 .match_one_to_many(query, candidates)
948 .into_iter()
949 .enumerate()
950 .collect();
951 indexed.sort_by(|a, b| {
952 b.1.score
953 .partial_cmp(&a.1.score)
954 .unwrap_or(std::cmp::Ordering::Equal)
955 .then_with(|| a.0.cmp(&b.0))
956 });
957 indexed
958 }
959
960 pub fn deterministic_match(&self, worker1: &Worker, worker2: &Worker) -> bool {
995 if identifier_equal(
996 &worker1.uk_nhs_number,
997 &worker2.uk_nhs_number,
998 identifiers::parse_uk_nhs_number,
999 ) {
1000 return true;
1001 }
1002 if identifier_equal(&worker1.fr_nir, &worker2.fr_nir, identifiers::parse_fr_nir) {
1003 return true;
1004 }
1005 if identifier_equal(&worker1.es_tsi, &worker2.es_tsi, identifiers::parse_es_tsi) {
1006 return true;
1007 }
1008 if identifier_equal(&worker1.ie_ihi, &worker2.ie_ihi, identifiers::parse_ie_ihi) {
1009 return true;
1010 }
1011 if identifier_equal(
1012 &worker1.uk_hc_number,
1013 &worker2.uk_hc_number,
1014 identifiers::parse_uk_hc_number,
1015 ) {
1016 return true;
1017 }
1018 if identifier_equal(&worker1.us_ssn, &worker2.us_ssn, identifiers::parse_us_ssn) {
1019 return true;
1020 }
1021 if identifier_equal(&worker1.au_ihi, &worker2.au_ihi, identifiers::parse_au_ihi) {
1022 return true;
1023 }
1024 if identifier_equal(
1025 &worker1.de_kvnr,
1026 &worker2.de_kvnr,
1027 identifiers::parse_de_kvnr,
1028 ) {
1029 return true;
1030 }
1031 if identifier_equal(&worker1.it_cf, &worker2.it_cf, identifiers::parse_it_cf) {
1032 return true;
1033 }
1034 if identifier_equal(&worker1.nl_bsn, &worker2.nl_bsn, identifiers::parse_nl_bsn) {
1035 return true;
1036 }
1037 if identifier_equal(
1038 &worker1.se_workernummer,
1039 &worker2.se_workernummer,
1040 identifiers::parse_se_workernummer,
1041 ) {
1042 return true;
1043 }
1044 if identifier_equal(
1045 &worker1.uk_chi_number,
1046 &worker2.uk_chi_number,
1047 identifiers::parse_uk_chi_number,
1048 ) {
1049 return true;
1050 }
1051 if identifier_equal(&worker1.be_nn, &worker2.be_nn, identifiers::parse_be_nn) {
1052 return true;
1053 }
1054 if identifier_equal(&worker1.bg_egn, &worker2.bg_egn, identifiers::parse_bg_egn) {
1055 return true;
1056 }
1057 if identifier_equal(&worker1.cz_rc, &worker2.cz_rc, identifiers::parse_cz_rc) {
1058 return true;
1059 }
1060 if identifier_equal(&worker1.dk_cpr, &worker2.dk_cpr, identifiers::parse_dk_cpr) {
1061 return true;
1062 }
1063 if identifier_equal(&worker1.ee_ik, &worker2.ee_ik, identifiers::parse_ee_ik) {
1064 return true;
1065 }
1066 if identifier_equal(&worker1.es_dni, &worker2.es_dni, identifiers::parse_es_dni) {
1067 return true;
1068 }
1069 if identifier_equal(
1070 &worker1.fi_hetu,
1071 &worker2.fi_hetu,
1072 identifiers::parse_fi_hetu,
1073 ) {
1074 return true;
1075 }
1076 if identifier_equal(&worker1.hr_oib, &worker2.hr_oib, identifiers::parse_hr_oib) {
1077 return true;
1078 }
1079 if identifier_equal(&worker1.is_kt, &worker2.is_kt, identifiers::parse_is_kt) {
1080 return true;
1081 }
1082 if identifier_equal(&worker1.lt_ak, &worker2.lt_ak, identifiers::parse_lt_ak) {
1083 return true;
1084 }
1085 if identifier_equal(&worker1.lv_pk, &worker2.lv_pk, identifiers::parse_lv_pk) {
1086 return true;
1087 }
1088 if identifier_equal(&worker1.mt_id, &worker2.mt_id, identifiers::parse_mt_id) {
1089 return true;
1090 }
1091 if identifier_equal(&worker1.no_fnr, &worker2.no_fnr, identifiers::parse_no_fnr) {
1092 return true;
1093 }
1094 if identifier_equal(
1095 &worker1.pl_pesel,
1096 &worker2.pl_pesel,
1097 identifiers::parse_pl_pesel,
1098 ) {
1099 return true;
1100 }
1101 if identifier_equal(&worker1.ro_cnp, &worker2.ro_cnp, identifiers::parse_ro_cnp) {
1102 return true;
1103 }
1104 if identifier_equal(
1105 &worker1.si_emso,
1106 &worker2.si_emso,
1107 identifiers::parse_si_emso,
1108 ) {
1109 return true;
1110 }
1111 if identifier_equal(&worker1.sk_rc, &worker2.sk_rc, identifiers::parse_sk_rc) {
1112 return true;
1113 }
1114 if identifier_equal(
1115 &worker1.uk_nino,
1116 &worker2.uk_nino,
1117 identifiers::parse_uk_nino,
1118 ) {
1119 return true;
1120 }
1121 if identifier_equal(&worker1.gr_dss, &worker2.gr_dss, identifiers::parse_gr_dss) {
1122 return true;
1123 }
1124 if identifier_equal(&worker1.li_id, &worker2.li_id, identifiers::parse_li_id) {
1125 return true;
1126 }
1127 if identifier_equal(&worker1.nl_id, &worker2.nl_id, identifiers::parse_nl_id) {
1128 return true;
1129 }
1130 if identifier_equal(&worker1.pl_nip, &worker2.pl_nip, identifiers::parse_pl_nip) {
1131 return true;
1132 }
1133 if identifier_equal(&worker1.pt_nif, &worker2.pt_nif, identifiers::parse_pt_nif) {
1134 return true;
1135 }
1136 if identifier_equal(&worker1.br_cpf, &worker2.br_cpf, identifiers::parse_br_cpf) {
1137 return true;
1138 }
1139 if identifier_equal(&worker1.cn_rrn, &worker2.cn_rrn, identifiers::parse_cn_rrn) {
1140 return true;
1141 }
1142 if identifier_equal(
1143 &worker1.in_aadhaar,
1144 &worker2.in_aadhaar,
1145 identifiers::parse_in_aadhaar,
1146 ) {
1147 return true;
1148 }
1149 if identifier_equal(
1150 &worker1.jp_my_number,
1151 &worker2.jp_my_number,
1152 identifiers::parse_jp_my_number,
1153 ) {
1154 return true;
1155 }
1156 if identifier_equal(
1157 &worker1.mx_curp,
1158 &worker2.mx_curp,
1159 identifiers::parse_mx_curp,
1160 ) {
1161 return true;
1162 }
1163 if identifier_equal(&worker1.nz_nhi, &worker2.nz_nhi, identifiers::parse_nz_nhi) {
1164 return true;
1165 }
1166 if identifier_equal(&worker1.za_id, &worker2.za_id, identifiers::parse_za_id) {
1167 return true;
1168 }
1169 if passport_books_share_pair(&worker1.passport_books, &worker2.passport_books) {
1170 return true;
1171 }
1172
1173 let name_match = match (&worker1.given_name, &worker2.given_name) {
1174 (Some(f1), Some(f2)) => {
1175 Normalizer::normalize_name(f1) == Normalizer::normalize_name(f2)
1176 }
1177 _ => false,
1178 } && match (&worker1.family_name, &worker2.family_name) {
1179 (Some(l1), Some(l2)) => {
1180 Normalizer::normalize_name(l1) == Normalizer::normalize_name(l2)
1181 }
1182 _ => false,
1183 };
1184
1185 let dob_match = match (worker1.date_of_birth, worker2.date_of_birth) {
1186 (Some(d1), Some(d2)) => d1 == d2,
1187 _ => false,
1188 };
1189
1190 let gender_match = match (worker1.gender, worker2.gender) {
1191 (Some(g1), Some(g2)) => g1 == g2,
1192 _ => true,
1193 };
1194
1195 name_match && dob_match && gender_match
1196 }
1197
1198 fn calculate_breakdown(&self, worker1: &Worker, worker2: &Worker) -> MatchBreakdown {
1199 MatchBreakdown {
1200 uk_nhs_number_score: identifier_score(
1201 &worker1.uk_nhs_number,
1202 &worker2.uk_nhs_number,
1203 identifiers::parse_uk_nhs_number,
1204 ),
1205 fr_nir_score: identifier_score(
1206 &worker1.fr_nir,
1207 &worker2.fr_nir,
1208 identifiers::parse_fr_nir,
1209 ),
1210 es_tsi_score: identifier_score(
1211 &worker1.es_tsi,
1212 &worker2.es_tsi,
1213 identifiers::parse_es_tsi,
1214 ),
1215 ie_ihi_score: identifier_score(
1216 &worker1.ie_ihi,
1217 &worker2.ie_ihi,
1218 identifiers::parse_ie_ihi,
1219 ),
1220 uk_hc_number_score: identifier_score(
1221 &worker1.uk_hc_number,
1222 &worker2.uk_hc_number,
1223 identifiers::parse_uk_hc_number,
1224 ),
1225 us_ssn_score: identifier_score(
1226 &worker1.us_ssn,
1227 &worker2.us_ssn,
1228 identifiers::parse_us_ssn,
1229 ),
1230 au_ihi_score: identifier_score(
1231 &worker1.au_ihi,
1232 &worker2.au_ihi,
1233 identifiers::parse_au_ihi,
1234 ),
1235 de_kvnr_score: identifier_score(
1236 &worker1.de_kvnr,
1237 &worker2.de_kvnr,
1238 identifiers::parse_de_kvnr,
1239 ),
1240 it_cf_score: identifier_score(&worker1.it_cf, &worker2.it_cf, identifiers::parse_it_cf),
1241 nl_bsn_score: identifier_score(
1242 &worker1.nl_bsn,
1243 &worker2.nl_bsn,
1244 identifiers::parse_nl_bsn,
1245 ),
1246 se_workernummer_score: identifier_score(
1247 &worker1.se_workernummer,
1248 &worker2.se_workernummer,
1249 identifiers::parse_se_workernummer,
1250 ),
1251 uk_chi_number_score: identifier_score(
1252 &worker1.uk_chi_number,
1253 &worker2.uk_chi_number,
1254 identifiers::parse_uk_chi_number,
1255 ),
1256 be_nn_score: identifier_score(&worker1.be_nn, &worker2.be_nn, identifiers::parse_be_nn),
1257 bg_egn_score: identifier_score(
1258 &worker1.bg_egn,
1259 &worker2.bg_egn,
1260 identifiers::parse_bg_egn,
1261 ),
1262 cz_rc_score: identifier_score(&worker1.cz_rc, &worker2.cz_rc, identifiers::parse_cz_rc),
1263 dk_cpr_score: identifier_score(
1264 &worker1.dk_cpr,
1265 &worker2.dk_cpr,
1266 identifiers::parse_dk_cpr,
1267 ),
1268 ee_ik_score: identifier_score(&worker1.ee_ik, &worker2.ee_ik, identifiers::parse_ee_ik),
1269 es_dni_score: identifier_score(
1270 &worker1.es_dni,
1271 &worker2.es_dni,
1272 identifiers::parse_es_dni,
1273 ),
1274 fi_hetu_score: identifier_score(
1275 &worker1.fi_hetu,
1276 &worker2.fi_hetu,
1277 identifiers::parse_fi_hetu,
1278 ),
1279 hr_oib_score: identifier_score(
1280 &worker1.hr_oib,
1281 &worker2.hr_oib,
1282 identifiers::parse_hr_oib,
1283 ),
1284 is_kt_score: identifier_score(&worker1.is_kt, &worker2.is_kt, identifiers::parse_is_kt),
1285 lt_ak_score: identifier_score(&worker1.lt_ak, &worker2.lt_ak, identifiers::parse_lt_ak),
1286 lv_pk_score: identifier_score(&worker1.lv_pk, &worker2.lv_pk, identifiers::parse_lv_pk),
1287 mt_id_score: identifier_score(&worker1.mt_id, &worker2.mt_id, identifiers::parse_mt_id),
1288 no_fnr_score: identifier_score(
1289 &worker1.no_fnr,
1290 &worker2.no_fnr,
1291 identifiers::parse_no_fnr,
1292 ),
1293 pl_pesel_score: identifier_score(
1294 &worker1.pl_pesel,
1295 &worker2.pl_pesel,
1296 identifiers::parse_pl_pesel,
1297 ),
1298 ro_cnp_score: identifier_score(
1299 &worker1.ro_cnp,
1300 &worker2.ro_cnp,
1301 identifiers::parse_ro_cnp,
1302 ),
1303 si_emso_score: identifier_score(
1304 &worker1.si_emso,
1305 &worker2.si_emso,
1306 identifiers::parse_si_emso,
1307 ),
1308 sk_rc_score: identifier_score(&worker1.sk_rc, &worker2.sk_rc, identifiers::parse_sk_rc),
1309 uk_nino_score: identifier_score(
1310 &worker1.uk_nino,
1311 &worker2.uk_nino,
1312 identifiers::parse_uk_nino,
1313 ),
1314 gr_dss_score: identifier_score(
1315 &worker1.gr_dss,
1316 &worker2.gr_dss,
1317 identifiers::parse_gr_dss,
1318 ),
1319 li_id_score: identifier_score(&worker1.li_id, &worker2.li_id, identifiers::parse_li_id),
1320 nl_id_score: identifier_score(&worker1.nl_id, &worker2.nl_id, identifiers::parse_nl_id),
1321 pl_nip_score: identifier_score(
1322 &worker1.pl_nip,
1323 &worker2.pl_nip,
1324 identifiers::parse_pl_nip,
1325 ),
1326 pt_nif_score: identifier_score(
1327 &worker1.pt_nif,
1328 &worker2.pt_nif,
1329 identifiers::parse_pt_nif,
1330 ),
1331 br_cpf_score: identifier_score(
1332 &worker1.br_cpf,
1333 &worker2.br_cpf,
1334 identifiers::parse_br_cpf,
1335 ),
1336 cn_rrn_score: identifier_score(
1337 &worker1.cn_rrn,
1338 &worker2.cn_rrn,
1339 identifiers::parse_cn_rrn,
1340 ),
1341 in_aadhaar_score: identifier_score(
1342 &worker1.in_aadhaar,
1343 &worker2.in_aadhaar,
1344 identifiers::parse_in_aadhaar,
1345 ),
1346 jp_my_number_score: identifier_score(
1347 &worker1.jp_my_number,
1348 &worker2.jp_my_number,
1349 identifiers::parse_jp_my_number,
1350 ),
1351 mx_curp_score: identifier_score(
1352 &worker1.mx_curp,
1353 &worker2.mx_curp,
1354 identifiers::parse_mx_curp,
1355 ),
1356 nz_nhi_score: identifier_score(
1357 &worker1.nz_nhi,
1358 &worker2.nz_nhi,
1359 identifiers::parse_nz_nhi,
1360 ),
1361 za_id_score: identifier_score(&worker1.za_id, &worker2.za_id, identifiers::parse_za_id),
1362 passport_book_score: score_passport_books(
1363 &worker1.passport_books,
1364 &worker2.passport_books,
1365 ),
1366 given_name_score: self.score_given_name(worker1, worker2),
1367 family_name_score: self.score_family_name(worker1, worker2),
1368 date_of_birth_score: self.score_date_of_birth(worker1, worker2),
1369 gender_score: self.score_gender(worker1, worker2),
1370 blood_type_score: self.score_blood_type(worker1, worker2),
1371 multiple_birth_score: self.score_multiple_birth(worker1, worker2),
1372 address_score: self.score_address(worker1, worker2),
1373 birth_place_score: self.score_birth_place(worker1, worker2),
1374 death_date_score: self.score_death_date(worker1, worker2),
1375 death_place_score: self.score_death_place(worker1, worker2),
1376 phone_score: self.score_phone(worker1, worker2),
1377 email_score: self.score_email(worker1, worker2),
1378 phonetic_name_score: if self.config.use_phonetic_matching {
1379 self.score_phonetic_names(worker1, worker2)
1380 } else {
1381 None
1382 },
1383 }
1384 }
1385
1386 fn calculate_weighted_score(&self, breakdown: &MatchBreakdown) -> f64 {
1387 let mut total_weight = 0.0;
1388 let mut weighted_sum = 0.0;
1389
1390 if let Some(score) = breakdown.uk_nhs_number_score {
1391 weighted_sum += score * self.config.uk_nhs_number_weight;
1392 total_weight += self.config.uk_nhs_number_weight;
1393 }
1394 if let Some(score) = breakdown.fr_nir_score {
1395 weighted_sum += score * self.config.fr_nir_weight;
1396 total_weight += self.config.fr_nir_weight;
1397 }
1398 if let Some(score) = breakdown.es_tsi_score {
1399 weighted_sum += score * self.config.es_tsi_weight;
1400 total_weight += self.config.es_tsi_weight;
1401 }
1402 if let Some(score) = breakdown.ie_ihi_score {
1403 weighted_sum += score * self.config.ie_ihi_weight;
1404 total_weight += self.config.ie_ihi_weight;
1405 }
1406 if let Some(score) = breakdown.uk_hc_number_score {
1407 weighted_sum += score * self.config.uk_hc_number_weight;
1408 total_weight += self.config.uk_hc_number_weight;
1409 }
1410 if let Some(score) = breakdown.us_ssn_score {
1411 weighted_sum += score * self.config.us_ssn_weight;
1412 total_weight += self.config.us_ssn_weight;
1413 }
1414 if let Some(score) = breakdown.au_ihi_score {
1415 weighted_sum += score * self.config.au_ihi_weight;
1416 total_weight += self.config.au_ihi_weight;
1417 }
1418 if let Some(score) = breakdown.de_kvnr_score {
1419 weighted_sum += score * self.config.de_kvnr_weight;
1420 total_weight += self.config.de_kvnr_weight;
1421 }
1422 if let Some(score) = breakdown.it_cf_score {
1423 weighted_sum += score * self.config.it_cf_weight;
1424 total_weight += self.config.it_cf_weight;
1425 }
1426 if let Some(score) = breakdown.nl_bsn_score {
1427 weighted_sum += score * self.config.nl_bsn_weight;
1428 total_weight += self.config.nl_bsn_weight;
1429 }
1430 if let Some(score) = breakdown.se_workernummer_score {
1431 weighted_sum += score * self.config.se_workernummer_weight;
1432 total_weight += self.config.se_workernummer_weight;
1433 }
1434 if let Some(score) = breakdown.uk_chi_number_score {
1435 weighted_sum += score * self.config.uk_chi_number_weight;
1436 total_weight += self.config.uk_chi_number_weight;
1437 }
1438 if let Some(score) = breakdown.be_nn_score {
1439 weighted_sum += score * self.config.be_nn_weight;
1440 total_weight += self.config.be_nn_weight;
1441 }
1442 if let Some(score) = breakdown.bg_egn_score {
1443 weighted_sum += score * self.config.bg_egn_weight;
1444 total_weight += self.config.bg_egn_weight;
1445 }
1446 if let Some(score) = breakdown.cz_rc_score {
1447 weighted_sum += score * self.config.cz_rc_weight;
1448 total_weight += self.config.cz_rc_weight;
1449 }
1450 if let Some(score) = breakdown.dk_cpr_score {
1451 weighted_sum += score * self.config.dk_cpr_weight;
1452 total_weight += self.config.dk_cpr_weight;
1453 }
1454 if let Some(score) = breakdown.ee_ik_score {
1455 weighted_sum += score * self.config.ee_ik_weight;
1456 total_weight += self.config.ee_ik_weight;
1457 }
1458 if let Some(score) = breakdown.es_dni_score {
1459 weighted_sum += score * self.config.es_dni_weight;
1460 total_weight += self.config.es_dni_weight;
1461 }
1462 if let Some(score) = breakdown.fi_hetu_score {
1463 weighted_sum += score * self.config.fi_hetu_weight;
1464 total_weight += self.config.fi_hetu_weight;
1465 }
1466 if let Some(score) = breakdown.hr_oib_score {
1467 weighted_sum += score * self.config.hr_oib_weight;
1468 total_weight += self.config.hr_oib_weight;
1469 }
1470 if let Some(score) = breakdown.is_kt_score {
1471 weighted_sum += score * self.config.is_kt_weight;
1472 total_weight += self.config.is_kt_weight;
1473 }
1474 if let Some(score) = breakdown.lt_ak_score {
1475 weighted_sum += score * self.config.lt_ak_weight;
1476 total_weight += self.config.lt_ak_weight;
1477 }
1478 if let Some(score) = breakdown.lv_pk_score {
1479 weighted_sum += score * self.config.lv_pk_weight;
1480 total_weight += self.config.lv_pk_weight;
1481 }
1482 if let Some(score) = breakdown.mt_id_score {
1483 weighted_sum += score * self.config.mt_id_weight;
1484 total_weight += self.config.mt_id_weight;
1485 }
1486 if let Some(score) = breakdown.no_fnr_score {
1487 weighted_sum += score * self.config.no_fnr_weight;
1488 total_weight += self.config.no_fnr_weight;
1489 }
1490 if let Some(score) = breakdown.pl_pesel_score {
1491 weighted_sum += score * self.config.pl_pesel_weight;
1492 total_weight += self.config.pl_pesel_weight;
1493 }
1494 if let Some(score) = breakdown.ro_cnp_score {
1495 weighted_sum += score * self.config.ro_cnp_weight;
1496 total_weight += self.config.ro_cnp_weight;
1497 }
1498 if let Some(score) = breakdown.si_emso_score {
1499 weighted_sum += score * self.config.si_emso_weight;
1500 total_weight += self.config.si_emso_weight;
1501 }
1502 if let Some(score) = breakdown.sk_rc_score {
1503 weighted_sum += score * self.config.sk_rc_weight;
1504 total_weight += self.config.sk_rc_weight;
1505 }
1506 if let Some(score) = breakdown.uk_nino_score {
1507 weighted_sum += score * self.config.uk_nino_weight;
1508 total_weight += self.config.uk_nino_weight;
1509 }
1510 if let Some(score) = breakdown.gr_dss_score {
1511 weighted_sum += score * self.config.gr_dss_weight;
1512 total_weight += self.config.gr_dss_weight;
1513 }
1514 if let Some(score) = breakdown.li_id_score {
1515 weighted_sum += score * self.config.li_id_weight;
1516 total_weight += self.config.li_id_weight;
1517 }
1518 if let Some(score) = breakdown.nl_id_score {
1519 weighted_sum += score * self.config.nl_id_weight;
1520 total_weight += self.config.nl_id_weight;
1521 }
1522 if let Some(score) = breakdown.pl_nip_score {
1523 weighted_sum += score * self.config.pl_nip_weight;
1524 total_weight += self.config.pl_nip_weight;
1525 }
1526 if let Some(score) = breakdown.pt_nif_score {
1527 weighted_sum += score * self.config.pt_nif_weight;
1528 total_weight += self.config.pt_nif_weight;
1529 }
1530 if let Some(score) = breakdown.br_cpf_score {
1531 weighted_sum += score * self.config.br_cpf_weight;
1532 total_weight += self.config.br_cpf_weight;
1533 }
1534 if let Some(score) = breakdown.cn_rrn_score {
1535 weighted_sum += score * self.config.cn_rrn_weight;
1536 total_weight += self.config.cn_rrn_weight;
1537 }
1538 if let Some(score) = breakdown.in_aadhaar_score {
1539 weighted_sum += score * self.config.in_aadhaar_weight;
1540 total_weight += self.config.in_aadhaar_weight;
1541 }
1542 if let Some(score) = breakdown.jp_my_number_score {
1543 weighted_sum += score * self.config.jp_my_number_weight;
1544 total_weight += self.config.jp_my_number_weight;
1545 }
1546 if let Some(score) = breakdown.mx_curp_score {
1547 weighted_sum += score * self.config.mx_curp_weight;
1548 total_weight += self.config.mx_curp_weight;
1549 }
1550 if let Some(score) = breakdown.nz_nhi_score {
1551 weighted_sum += score * self.config.nz_nhi_weight;
1552 total_weight += self.config.nz_nhi_weight;
1553 }
1554 if let Some(score) = breakdown.za_id_score {
1555 weighted_sum += score * self.config.za_id_weight;
1556 total_weight += self.config.za_id_weight;
1557 }
1558 if let Some(score) = breakdown.passport_book_score {
1559 weighted_sum += score * self.config.passport_book_weight;
1560 total_weight += self.config.passport_book_weight;
1561 }
1562 if let Some(score) = breakdown.given_name_score {
1563 weighted_sum += score * self.config.given_name_weight;
1564 total_weight += self.config.given_name_weight;
1565 }
1566 if let Some(score) = breakdown.family_name_score {
1567 weighted_sum += score * self.config.family_name_weight;
1568 total_weight += self.config.family_name_weight;
1569 }
1570 if let Some(score) = breakdown.date_of_birth_score {
1571 weighted_sum += score * self.config.date_of_birth_weight;
1572 total_weight += self.config.date_of_birth_weight;
1573 }
1574 if let Some(score) = breakdown.gender_score {
1575 weighted_sum += score * self.config.gender_weight;
1576 total_weight += self.config.gender_weight;
1577 }
1578 if let Some(score) = breakdown.blood_type_score {
1579 weighted_sum += score * self.config.blood_type_weight;
1580 total_weight += self.config.blood_type_weight;
1581 }
1582 if let Some(score) = breakdown.multiple_birth_score {
1583 weighted_sum += score * self.config.multiple_birth_weight;
1584 total_weight += self.config.multiple_birth_weight;
1585 }
1586 if let Some(score) = breakdown.address_score {
1587 weighted_sum += score * self.config.address_weight;
1588 total_weight += self.config.address_weight;
1589 }
1590 if let Some(score) = breakdown.birth_place_score {
1591 weighted_sum += score * self.config.birth_place_weight;
1592 total_weight += self.config.birth_place_weight;
1593 }
1594 if let Some(score) = breakdown.death_date_score {
1595 weighted_sum += score * self.config.death_date_weight;
1596 total_weight += self.config.death_date_weight;
1597 }
1598 if let Some(score) = breakdown.death_place_score {
1599 weighted_sum += score * self.config.death_place_weight;
1600 total_weight += self.config.death_place_weight;
1601 }
1602 if let Some(score) = breakdown.phone_score {
1603 weighted_sum += score * self.config.phone_weight;
1604 total_weight += self.config.phone_weight;
1605 }
1606 if let Some(score) = breakdown.email_score {
1607 weighted_sum += score * self.config.email_weight;
1608 total_weight += self.config.email_weight;
1609 }
1610
1611 if let Some(score) = breakdown.phonetic_name_score
1613 && score > 0.9
1614 {
1615 weighted_sum += score * 0.05;
1616 total_weight += 0.05;
1617 }
1618
1619 if total_weight > 0.0 {
1620 weighted_sum / total_weight
1621 } else {
1622 0.0
1623 }
1624 }
1625
1626 fn score_given_name(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1627 let (g1, g2) = match (&worker1.given_name, &worker2.given_name) {
1628 (Some(a), Some(b)) => (a.as_str(), b.as_str()),
1629 _ => return None,
1630 };
1631 let given = self.score_name(g1, g2);
1632 let blended = match (&worker1.middle_name, &worker2.middle_name) {
1639 (Some(m1), Some(m2)) => {
1640 let middle = self.score_name(m1, m2);
1641 0.95 * given + 0.05 * middle
1642 }
1643 _ => given,
1644 };
1645 Some(blended)
1646 }
1647
1648 fn score_family_name(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1649 match (&worker1.family_name, &worker2.family_name) {
1650 (Some(name1), Some(name2)) => Some(self.score_name(name1, name2)),
1651 _ => None,
1652 }
1653 }
1654
1655 fn score_name(&self, name1: &str, name2: &str) -> f64 {
1656 let norm1 = Normalizer::normalize_name(name1);
1657 let norm2 = Normalizer::normalize_name(name2);
1658 let base = match self.config.name_algorithm {
1659 SimilarityAlgorithm::JaroWinkler => Scorer::jaro_winkler_similarity(&norm1, &norm2),
1660 SimilarityAlgorithm::Levenshtein => Scorer::levenshtein_similarity(&norm1, &norm2),
1661 SimilarityAlgorithm::Exact => Scorer::exact_match(&norm1, &norm2),
1662 SimilarityAlgorithm::Combined => Scorer::combined_similarity(&norm1, &norm2),
1663 };
1664 if !self.config.nickname_table.is_empty()
1670 && self.config.nickname_table.are_equivalent(&norm1, &norm2)
1671 {
1672 base.max(0.9)
1673 } else {
1674 base
1675 }
1676 }
1677
1678 fn score_date_of_birth(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1679 match (worker1.date_of_birth, worker2.date_of_birth) {
1680 (Some(dob1), Some(dob2)) => Some(score_dob_pair(dob1, dob2)),
1681 _ => None,
1682 }
1683 }
1684
1685 fn score_gender(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1686 match (worker1.gender, worker2.gender) {
1687 (Some(g1), Some(g2)) => Some(if g1 == g2 { 1.0 } else { 0.0 }),
1688 _ => None,
1689 }
1690 }
1691
1692 fn score_blood_type(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1693 match (worker1.blood_type, worker2.blood_type) {
1694 (Some(b1), Some(b2)) => Some(if b1 == b2 { 1.0 } else { 0.0 }),
1695 _ => None,
1696 }
1697 }
1698
1699 fn score_multiple_birth(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1700 match (worker1.multiple_birth, worker2.multiple_birth) {
1701 (Some(m1), Some(m2)) => Some(f64::from(m1 == m2)),
1702 _ => None,
1703 }
1704 }
1705
1706 fn score_address(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1707 let all_p1: Vec<&Address> = worker1
1715 .address
1716 .as_ref()
1717 .into_iter()
1718 .chain(worker1.previous_addresses.iter())
1719 .collect();
1720 let all_p2: Vec<&Address> = worker2
1721 .address
1722 .as_ref()
1723 .into_iter()
1724 .chain(worker2.previous_addresses.iter())
1725 .collect();
1726 if all_p1.is_empty() || all_p2.is_empty() {
1727 return None;
1728 }
1729 let mut best = f64::NEG_INFINITY;
1730 for a1 in &all_p1 {
1731 for a2 in &all_p2 {
1732 let s = self.compare_addresses(a1, a2);
1733 if s > best {
1734 best = s;
1735 }
1736 }
1737 }
1738 Some(best)
1739 }
1740
1741 fn score_birth_place(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1747 score_named_place(worker1.birth_place.as_ref()?, worker2.birth_place.as_ref()?)
1748 }
1749
1750 fn score_death_place(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1756 score_named_place(worker1.death_place.as_ref()?, worker2.death_place.as_ref()?)
1757 }
1758
1759 fn score_death_date(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1765 Some(score_dob_pair(worker1.death_date?, worker2.death_date?))
1766 }
1767
1768 fn compare_addresses(&self, addr1: &Address, addr2: &Address) -> f64 {
1769 let mut weighted_sum = 0.0_f64;
1777 let mut total_weight = 0.0_f64;
1778
1779 if let (Some(pc1), Some(pc2)) = (&addr1.postcode, &addr2.postcode) {
1780 let norm1 = Normalizer::normalize_postcode(pc1);
1781 let norm2 = Normalizer::normalize_postcode(pc2);
1782 weighted_sum += f64::from(norm1 == norm2) * 0.5;
1783 total_weight += 0.5;
1784 }
1785
1786 if let (Some(city1), Some(city2)) = (&addr1.city, &addr2.city) {
1787 let norm1 = Normalizer::normalize_name(city1);
1788 let norm2 = Normalizer::normalize_name(city2);
1789 weighted_sum += Scorer::jaro_winkler_similarity(&norm1, &norm2) * 0.3;
1790 total_weight += 0.3;
1791 }
1792
1793 if let (Some(line1), Some(line2)) = (&addr1.line1, &addr2.line1) {
1794 let parsed1 = Normalizer::parse_address_line(line1);
1795 let parsed2 = Normalizer::parse_address_line(line2);
1796 let street_sim = Scorer::jaro_winkler_similarity(&parsed1.street, &parsed2.street);
1797 let house_score = match (&parsed1.house_number, &parsed2.house_number) {
1798 (Some(a), Some(b)) => Some(f64::from(a == b)),
1799 _ => None,
1800 };
1801 let line1_score = match house_score {
1802 Some(h) => 0.6 * street_sim + 0.4 * h,
1803 None => street_sim,
1804 };
1805 weighted_sum += line1_score * 0.2;
1806 total_weight += 0.2;
1807 }
1808
1809 if total_weight == 0.0 {
1810 0.5
1811 } else {
1812 weighted_sum / total_weight
1813 }
1814 }
1815
1816 fn score_phone(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1817 let phone1 = worker1.phone.as_ref().or(worker1.mobile.as_ref())?;
1818 let phone2 = worker2.phone.as_ref().or(worker2.mobile.as_ref())?;
1819
1820 let default = self.config.phone_default_country.as_deref();
1821 let e164_1 = Normalizer::normalize_phone_e164(phone1, default);
1822 let e164_2 = Normalizer::normalize_phone_e164(phone2, default);
1823
1824 if let (Some(a), Some(b)) = (&e164_1, &e164_2) {
1830 return Some(f64::from(a == b));
1831 }
1832
1833 let norm1 = Normalizer::normalize_phone(phone1);
1834 let norm2 = Normalizer::normalize_phone(phone2);
1835 Some(f64::from(norm1 == norm2))
1836 }
1837
1838 fn score_email(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1839 let email1 = worker1.email.as_ref()?;
1840 let email2 = worker2.email.as_ref()?;
1841 let fold = self.config.gmail_dot_folding;
1842 let canonical1 = Normalizer::normalize_email(email1, fold)?;
1843 let canonical2 = Normalizer::normalize_email(email2, fold)?;
1844 Some(f64::from(canonical1 == canonical2))
1845 }
1846
1847 fn score_phonetic_names(&self, worker1: &Worker, worker2: &Worker) -> Option<f64> {
1848 let p1_given_name = worker1.given_name.as_ref()?;
1849 let p1_given_name_phonetic = Normalizer::phonetic_code(p1_given_name);
1850 let p1_family_name = worker1.family_name.as_ref()?;
1851 let p1_family_name_phonetic = Normalizer::phonetic_code(p1_family_name);
1852
1853 let p2_given_name = worker2.given_name.as_ref()?;
1854 let p2_given_name_phonetic = Normalizer::phonetic_code(p2_given_name);
1855 let p2_family_name = worker2.family_name.as_ref()?;
1856 let p2_family_name_phonetic = Normalizer::phonetic_code(p2_family_name);
1857
1858 let given_name_match = f64::from(p1_given_name_phonetic == p2_given_name_phonetic);
1859 let family_name_match = f64::from(p1_family_name_phonetic == p2_family_name_phonetic);
1860 Some((given_name_match + family_name_match) / 2.0)
1861 }
1862}
1863
1864fn identifier_equal<F>(a: &Option<String>, b: &Option<String>, parser: F) -> bool
1868where
1869 F: Fn(&str) -> Option<String>,
1870{
1871 match (a, b) {
1872 (Some(x), Some(y)) => match (parser(x), parser(y)) {
1873 (Some(cx), Some(cy)) => cx == cy,
1874 _ => false,
1875 },
1876 _ => false,
1877 }
1878}
1879
1880fn identifier_score<F>(a: &Option<String>, b: &Option<String>, parser: F) -> Option<f64>
1884where
1885 F: Fn(&str) -> Option<String>,
1886{
1887 if let (Some(x), Some(y)) = (a, b)
1888 && let (Some(cx), Some(cy)) = (parser(x), parser(y))
1889 {
1890 return Some(f64::from(cx == cy));
1891 }
1892 None
1893}
1894
1895fn passport_books_share_pair(a: &[PassportBook], b: &[PassportBook]) -> bool {
1906 for ba in a {
1907 for bb in b {
1908 if ba.country == bb.country && ba.number == bb.number {
1909 return true;
1910 }
1911 }
1912 }
1913 false
1914}
1915
1916fn score_passport_books(a: &[PassportBook], b: &[PassportBook]) -> Option<f64> {
1924 if a.is_empty() || b.is_empty() {
1925 return None;
1926 }
1927 Some(f64::from(passport_books_share_pair(a, b)))
1928}
1929
1930fn score_named_place(a: &Address, b: &Address) -> Option<f64> {
1951 let city = match (&a.city, &b.city) {
1952 (Some(c1), Some(c2)) => Some(Scorer::jaro_winkler_similarity(
1953 &Normalizer::normalize_name(c1),
1954 &Normalizer::normalize_name(c2),
1955 )),
1956 _ => None,
1957 };
1958 let country = match (&a.country, &b.country) {
1959 (Some(c1), Some(c2)) => Some(f64::from(
1960 Normalizer::normalize_name(c1) == Normalizer::normalize_name(c2),
1961 )),
1962 _ => None,
1963 };
1964 match (city, country) {
1965 (Some(c), Some(co)) => Some(0.7 * c + 0.3 * co),
1966 (Some(c), None) => Some(c),
1967 (None, Some(co)) => Some(co),
1968 (None, None) => None,
1969 }
1970}
1971
1972fn score_dob_pair(dob1: NaiveDate, dob2: NaiveDate) -> f64 {
1973 if dob1 == dob2 {
1974 return 1.0;
1975 }
1976 if dob1.year() == dob2.year()
1977 && let Some(swapped) = NaiveDate::from_ymd_opt(dob1.year(), dob1.day(), dob1.month())
1978 && swapped == dob2
1979 {
1980 return 0.5;
1981 }
1982 0.0
1983}
1984
1985#[cfg(test)]
1986mod tests {
1987 use super::*;
1988 use crate::models::Gender;
1989 use chrono::NaiveDate;
1990
1991 fn dob(y: i32, m: u32, d: u32) -> NaiveDate {
1992 NaiveDate::from_ymd_opt(y, m, d).expect("valid date")
1993 }
1994
1995 #[test]
1998 fn config_default_values() {
1999 let c = MatchConfig::default();
2000 assert!((c.match_threshold - 0.85).abs() < 1e-9);
2001 assert!((c.uk_nhs_number_weight - 0.30).abs() < 1e-9);
2002 assert!(c.use_phonetic_matching);
2003 assert!(!c.strict_mode);
2004 }
2005
2006 #[test]
2007 fn config_strict_raises_threshold_and_sets_flag() {
2008 let c = MatchConfig::strict();
2009 assert!((c.match_threshold - 0.95).abs() < 1e-9);
2010 assert!(c.strict_mode);
2011 }
2012
2013 #[test]
2016 fn config_default_round_trips_through_json() {
2017 let cfg = MatchConfig::default();
2018 let json = serde_json::to_string(&cfg).expect("serialise");
2019 let back: MatchConfig = serde_json::from_str(&json).expect("deserialise");
2020 assert!((cfg.match_threshold - back.match_threshold).abs() < 1e-12);
2021 assert!((cfg.uk_nhs_number_weight - back.uk_nhs_number_weight).abs() < 1e-12);
2022 assert_eq!(cfg.use_phonetic_matching, back.use_phonetic_matching);
2023 assert!(matches!(back.name_algorithm, SimilarityAlgorithm::Combined));
2024 assert_eq!(cfg.strict_mode, back.strict_mode);
2025 assert_eq!(cfg.nickname_table, back.nickname_table);
2026 assert_eq!(cfg.gmail_dot_folding, back.gmail_dot_folding);
2027 assert_eq!(cfg.phone_default_country, back.phone_default_country);
2028 }
2029
2030 #[test]
2031 fn config_strict_round_trips_through_json() {
2032 let cfg = MatchConfig::strict();
2033 let json = serde_json::to_string(&cfg).expect("serialise");
2034 let back: MatchConfig = serde_json::from_str(&json).expect("deserialise");
2035 assert!((back.match_threshold - 0.95).abs() < 1e-12);
2036 assert!(back.strict_mode);
2037 }
2038
2039 #[test]
2040 fn config_lenient_round_trips_through_json() {
2041 let cfg = MatchConfig::lenient();
2042 let json = serde_json::to_string(&cfg).expect("serialise");
2043 let back: MatchConfig = serde_json::from_str(&json).expect("deserialise");
2044 assert!((back.match_threshold - 0.75).abs() < 1e-12);
2045 }
2046
2047 #[test]
2048 fn config_partial_json_fills_missing_fields_from_default() {
2049 let partial = r#"{"match_threshold": 0.80, "gmail_dot_folding": true}"#;
2053 let cfg: MatchConfig = serde_json::from_str(partial).expect("partial json");
2054 assert!((cfg.match_threshold - 0.80).abs() < 1e-12);
2055 assert!(cfg.gmail_dot_folding);
2056 assert!((cfg.uk_nhs_number_weight - 0.30).abs() < 1e-12);
2058 assert!(matches!(cfg.name_algorithm, SimilarityAlgorithm::Combined));
2059 assert_eq!(cfg.phone_default_country.as_deref(), Some("GB"));
2060 }
2061
2062 #[test]
2063 fn similarity_algorithm_round_trips_through_json() {
2064 for alg in [
2065 SimilarityAlgorithm::JaroWinkler,
2066 SimilarityAlgorithm::Levenshtein,
2067 SimilarityAlgorithm::Exact,
2068 SimilarityAlgorithm::Combined,
2069 ] {
2070 let json = serde_json::to_string(&alg).expect("serialise");
2071 let back: SimilarityAlgorithm = serde_json::from_str(&json).expect("deserialise");
2072 assert_eq!(alg, back);
2073 }
2074 }
2075
2076 #[test]
2077 fn config_lenient_lowers_threshold() {
2078 let c = MatchConfig::lenient();
2079 assert!((c.match_threshold - 0.75).abs() < 1e-9);
2080 assert!(c.use_phonetic_matching);
2081 }
2082
2083 #[test]
2086 fn exact_clone_is_a_match() {
2087 let p = Worker::builder()
2088 .given_name("John")
2089 .family_name("Smith")
2090 .date_of_birth(dob(1980, 5, 15))
2091 .gender(Gender::Male)
2092 .uk_nhs_number("9434765919")
2093 .build();
2094 let result = MatchingEngine::default_config().match_workers(&p, &p.clone());
2095 assert!(result.is_match);
2096 assert!(result.score > 0.95);
2097 }
2098
2099 #[test]
2100 fn fuzzy_given_name_still_matches() {
2101 let a = Worker::builder()
2102 .given_name("John")
2103 .family_name("Smith")
2104 .date_of_birth(dob(1980, 5, 15))
2105 .gender(Gender::Male)
2106 .build();
2107 let b = Worker::builder()
2108 .given_name("Jon")
2109 .family_name("Smith")
2110 .date_of_birth(dob(1980, 5, 15))
2111 .gender(Gender::Male)
2112 .build();
2113 let r = MatchingEngine::default_config().match_workers(&a, &b);
2114 assert!(r.is_match);
2115 assert!(r.score > 0.85);
2116 }
2117
2118 #[test]
2119 fn completely_different_patients_do_not_match() {
2120 let a = Worker::builder()
2121 .given_name("John")
2122 .family_name("Smith")
2123 .date_of_birth(dob(1980, 5, 15))
2124 .gender(Gender::Male)
2125 .build();
2126 let b = Worker::builder()
2127 .given_name("Jane")
2128 .family_name("Doe")
2129 .date_of_birth(dob(1990, 3, 20))
2130 .gender(Gender::Female)
2131 .build();
2132 let r = MatchingEngine::default_config().match_workers(&a, &b);
2133 assert!(!r.is_match);
2134 assert!(r.score < 0.5);
2135 }
2136
2137 #[test]
2138 fn no_overlapping_fields_returns_zero_score() {
2139 let a = Worker::builder().given_name("Solo").build();
2141 let b = Worker::builder().family_name("Only").build();
2142 let r = MatchingEngine::default_config().match_workers(&a, &b);
2143 assert_eq!(r.score, 0.0);
2144 assert!(!r.is_match);
2145 }
2146
2147 #[test]
2148 fn unparseable_uk_nhs_number_is_none_not_zero() {
2149 let a = Worker::builder()
2150 .uk_nhs_number("not-a-number")
2151 .given_name("John")
2152 .family_name("Smith")
2153 .date_of_birth(dob(1980, 5, 15))
2154 .build();
2155 let b = Worker::builder()
2156 .uk_nhs_number("also-not-a-number")
2157 .given_name("John")
2158 .family_name("Smith")
2159 .date_of_birth(dob(1980, 5, 15))
2160 .build();
2161 let r = MatchingEngine::default_config().match_workers(&a, &b);
2162 assert_eq!(
2163 r.breakdown.uk_nhs_number_score, None,
2164 "unparseable NHS numbers should not produce a 0.0 penalty"
2165 );
2166 assert!(r.is_match, "should still match on demographics");
2167 }
2168
2169 #[test]
2170 fn missing_field_yields_none_in_breakdown() {
2171 let a = Worker::builder().given_name("Ada").build();
2172 let b = Worker::builder()
2173 .given_name("Ada")
2174 .family_name("Lovelace")
2175 .build();
2176 let r = MatchingEngine::default_config().match_workers(&a, &b);
2177 assert!(r.breakdown.given_name_score.is_some());
2178 assert!(r.breakdown.family_name_score.is_none());
2179 }
2180
2181 #[test]
2182 fn phonetic_match_is_a_bonus_not_a_penalty() {
2183 let p = Worker::builder()
2185 .given_name("Stephen")
2186 .family_name("Jones")
2187 .build();
2188 let with_phon = MatchingEngine::new(MatchConfig {
2189 use_phonetic_matching: true,
2190 ..MatchConfig::default()
2191 })
2192 .match_workers(&p, &p.clone());
2193 let without_phon = MatchingEngine::new(MatchConfig {
2194 use_phonetic_matching: false,
2195 ..MatchConfig::default()
2196 })
2197 .match_workers(&p, &p.clone());
2198 assert!(with_phon.score >= without_phon.score);
2199 }
2200
2201 #[test]
2202 fn phonetic_score_disabled_when_config_off() {
2203 let p = Worker::builder()
2204 .given_name("Steven")
2205 .family_name("Smith")
2206 .build();
2207 let q = Worker::builder()
2208 .given_name("Stephen")
2209 .family_name("Smyth")
2210 .build();
2211 let r = MatchingEngine::new(MatchConfig {
2212 use_phonetic_matching: false,
2213 ..MatchConfig::default()
2214 })
2215 .match_workers(&p, &q);
2216 assert_eq!(r.breakdown.phonetic_name_score, None);
2217 }
2218
2219 #[test]
2220 fn address_with_no_subfields_is_neutral_half() {
2221 let a = Address::new();
2222 let b = Address::new();
2223 let engine = MatchingEngine::default_config();
2224 let score = engine.compare_addresses(&a, &b);
2225 assert!(
2226 (score - 0.5).abs() < 1e-9,
2227 "empty addresses must be neutral (0.5), got {score}"
2228 );
2229 }
2230
2231 #[test]
2232 fn address_postcode_dominates() {
2233 let mut a = Address::new();
2234 a.postcode = Some("CF10 1AA".into());
2235 let mut b = Address::new();
2236 b.postcode = Some("CF10 1AA".into());
2237 let s = MatchingEngine::default_config().compare_addresses(&a, &b);
2238 assert!(s > 0.0);
2239 }
2240
2241 #[test]
2244 fn deterministic_uk_nhs_match_overrides_demographics() {
2245 let a = Worker::builder()
2246 .uk_nhs_number("943 476 5919")
2247 .given_name("Bob")
2248 .build();
2249 let b = Worker::builder()
2250 .uk_nhs_number("9434765919")
2251 .given_name("Alice") .build();
2253 assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
2254 }
2255
2256 #[test]
2257 fn deterministic_demographics_match_when_all_align() {
2258 let p = Worker::builder()
2259 .given_name("John")
2260 .family_name("Smith")
2261 .date_of_birth(dob(1980, 5, 15))
2262 .gender(Gender::Male)
2263 .build();
2264 assert!(MatchingEngine::default_config().deterministic_match(&p, &p.clone()));
2265 }
2266
2267 #[test]
2268 fn deterministic_demographics_tolerates_missing_gender() {
2269 let a = Worker::builder()
2270 .given_name("John")
2271 .family_name("Smith")
2272 .date_of_birth(dob(1980, 5, 15))
2273 .build();
2274 let b = Worker::builder()
2275 .given_name("John")
2276 .family_name("Smith")
2277 .date_of_birth(dob(1980, 5, 15))
2278 .gender(Gender::Male)
2279 .build();
2280 assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
2281 }
2282
2283 #[test]
2284 fn deterministic_rejects_when_dob_differs() {
2285 let a = Worker::builder()
2286 .given_name("John")
2287 .family_name("Smith")
2288 .date_of_birth(dob(1980, 5, 15))
2289 .gender(Gender::Male)
2290 .build();
2291 let b = Worker::builder()
2292 .given_name("John")
2293 .family_name("Smith")
2294 .date_of_birth(dob(1980, 5, 16)) .gender(Gender::Male)
2296 .build();
2297 assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
2298 }
2299
2300 #[test]
2301 fn deterministic_rejects_when_gender_differs() {
2302 let a = Worker::builder()
2303 .given_name("John")
2304 .family_name("Smith")
2305 .date_of_birth(dob(1980, 5, 15))
2306 .gender(Gender::Male)
2307 .build();
2308 let b = Worker::builder()
2309 .given_name("John")
2310 .family_name("Smith")
2311 .date_of_birth(dob(1980, 5, 15))
2312 .gender(Gender::Female)
2313 .build();
2314 assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
2315 }
2316
2317 #[test]
2320 fn strict_mode_requires_deterministic_for_is_match() {
2321 let cfg = MatchConfig {
2325 match_threshold: 0.85,
2327 strict_mode: true,
2328 ..MatchConfig::default()
2329 };
2330 let p1 = Worker::builder()
2331 .given_name("John")
2332 .family_name("Smith")
2333 .date_of_birth(dob(1980, 5, 15))
2334 .gender(Gender::Male)
2335 .build();
2336 let p2 = Worker::builder()
2337 .given_name("Jon") .family_name("Smith")
2339 .date_of_birth(dob(1980, 5, 15))
2340 .gender(Gender::Male)
2341 .build();
2342 let engine = MatchingEngine::new(cfg);
2343 let r = engine.match_workers(&p1, &p2);
2344 assert!(
2345 r.score >= 0.85,
2346 "fuzzy score should clear lowered threshold"
2347 );
2348 assert!(!engine.deterministic_match(&p1, &p2));
2351 assert!(
2352 !r.is_match,
2353 "strict mode must reject fuzzy-only matches even above threshold"
2354 );
2355 }
2356
2357 #[test]
2358 fn strict_mode_accepts_when_deterministic_holds() {
2359 let cfg = MatchConfig::strict();
2362 let p1 = Worker::builder()
2363 .uk_nhs_number("9434765919")
2364 .given_name("John")
2365 .family_name("Smith")
2366 .date_of_birth(dob(1980, 5, 15))
2367 .gender(Gender::Male)
2368 .build();
2369 let p2 = p1.clone();
2370 let r = MatchingEngine::new(cfg).match_workers(&p1, &p2);
2371 assert!(r.is_match);
2372 }
2373
2374 #[test]
2375 fn non_strict_mode_accepts_fuzzy_match_above_threshold() {
2376 let p1 = Worker::builder()
2380 .given_name("John")
2381 .family_name("Smith")
2382 .date_of_birth(dob(1980, 5, 15))
2383 .gender(Gender::Male)
2384 .build();
2385 let p2 = Worker::builder()
2386 .given_name("Jon")
2387 .family_name("Smith")
2388 .date_of_birth(dob(1980, 5, 15))
2389 .gender(Gender::Male)
2390 .build();
2391 let r = MatchingEngine::default_config().match_workers(&p1, &p2);
2392 assert!(r.is_match);
2393 }
2394
2395 #[test]
2398 fn match_one_to_many_empty_candidates_yields_empty_vec() {
2399 let engine = MatchingEngine::default_config();
2400 let q = Worker::builder().given_name("Solo").build();
2401 assert!(engine.match_one_to_many(&q, &[]).is_empty());
2402 }
2403
2404 #[test]
2405 fn match_one_to_many_preserves_order() {
2406 let engine = MatchingEngine::default_config();
2407 let q = Worker::builder()
2408 .given_name("Ada")
2409 .family_name("Lovelace")
2410 .build();
2411 let candidates = vec![
2412 Worker::builder()
2413 .given_name("Grace")
2414 .family_name("Hopper")
2415 .build(),
2416 q.clone(),
2417 Worker::builder()
2418 .given_name("Alan")
2419 .family_name("Turing")
2420 .build(),
2421 ];
2422 let r = engine.match_one_to_many(&q, &candidates);
2423 assert_eq!(r.len(), 3);
2424 assert!(r[1].score > r[0].score);
2426 assert!(r[1].score > r[2].score);
2427 assert!(r[1].is_match);
2428 }
2429
2430 #[test]
2431 fn match_one_to_many_matches_individual_scoring() {
2432 let engine = MatchingEngine::default_config();
2435 let q = Worker::builder()
2436 .given_name("Ada")
2437 .family_name("Lovelace")
2438 .build();
2439 let candidates = vec![
2440 Worker::builder()
2441 .given_name("Ada")
2442 .family_name("Lovelace")
2443 .build(),
2444 Worker::builder()
2445 .given_name("Alan")
2446 .family_name("Turing")
2447 .build(),
2448 ];
2449 let batch = engine.match_one_to_many(&q, &candidates);
2450 for (i, c) in candidates.iter().enumerate() {
2451 let individual = engine.match_workers(&q, c);
2452 assert!((batch[i].score - individual.score).abs() < 1e-12);
2453 assert_eq!(batch[i].is_match, individual.is_match);
2454 assert_eq!(batch[i].confidence, individual.confidence);
2455 }
2456 }
2457
2458 #[test]
2459 fn rank_one_to_many_sorts_by_score_descending() {
2460 let engine = MatchingEngine::default_config();
2461 let q = Worker::builder()
2462 .given_name("Ada")
2463 .family_name("Lovelace")
2464 .build();
2465 let candidates = vec![
2466 Worker::builder()
2467 .given_name("Grace")
2468 .family_name("Hopper")
2469 .build(),
2470 q.clone(),
2471 Worker::builder()
2472 .given_name("Alan")
2473 .family_name("Turing")
2474 .build(),
2475 ];
2476 let ranked = engine.rank_one_to_many(&q, &candidates);
2477 assert_eq!(ranked.len(), 3);
2478 assert_eq!(ranked[0].0, 1);
2480 for w in ranked.windows(2) {
2482 assert!(w[0].1.score >= w[1].1.score);
2483 }
2484 }
2485
2486 #[test]
2487 fn rank_one_to_many_breaks_ties_by_ascending_original_index() {
2488 let engine = MatchingEngine::default_config();
2491 let q = Worker::builder()
2492 .given_name("Ada")
2493 .family_name("Lovelace")
2494 .build();
2495 let twin = q.clone();
2496 let candidates = vec![twin.clone(), twin.clone(), twin.clone()];
2497 let ranked = engine.rank_one_to_many(&q, &candidates);
2498 assert_eq!(ranked.len(), 3);
2499 assert_eq!(ranked[0].0, 0);
2501 assert_eq!(ranked[1].0, 1);
2502 assert_eq!(ranked[2].0, 2);
2503 }
2504
2505 #[test]
2506 fn match_one_to_many_is_deterministic_across_calls() {
2507 let engine = MatchingEngine::default_config();
2508 let q = Worker::builder().given_name("X").family_name("Y").build();
2509 let candidates = vec![
2510 Worker::builder().given_name("X").family_name("Y").build(),
2511 Worker::builder().given_name("A").family_name("B").build(),
2512 ];
2513 let a = engine.match_one_to_many(&q, &candidates);
2514 let b = engine.match_one_to_many(&q, &candidates);
2515 for i in 0..a.len() {
2516 assert!((a[i].score - b[i].score).abs() < 1e-12);
2517 }
2518 }
2519
2520 #[test]
2523 fn dob_pair_exact_equal_scores_one() {
2524 assert_eq!(score_dob_pair(dob(1995, 1, 10), dob(1995, 1, 10)), 1.0);
2525 }
2526
2527 #[test]
2528 fn dob_pair_day_month_swap_scores_half() {
2529 assert_eq!(score_dob_pair(dob(1995, 1, 10), dob(1995, 10, 1)), 0.5);
2531 assert_eq!(score_dob_pair(dob(1995, 10, 1), dob(1995, 1, 10)), 0.5);
2533 }
2534
2535 #[test]
2536 fn dob_pair_swap_requires_year_to_match() {
2537 assert_eq!(score_dob_pair(dob(1995, 1, 10), dob(1996, 10, 1)), 0.0);
2539 }
2540
2541 #[test]
2542 fn dob_pair_swap_skipped_when_day_exceeds_12() {
2543 assert_eq!(score_dob_pair(dob(1995, 1, 25), dob(1995, 1, 26)), 0.0);
2549 assert_eq!(score_dob_pair(dob(1995, 1, 25), dob(1995, 2, 25)), 0.0);
2550 }
2551
2552 #[test]
2553 fn dob_pair_unrelated_dates_score_zero() {
2554 assert_eq!(score_dob_pair(dob(1995, 1, 10), dob(1980, 6, 30)), 0.0);
2555 assert_eq!(score_dob_pair(dob(1995, 1, 10), dob(1995, 1, 11)), 0.0);
2556 }
2557
2558 #[test]
2559 fn dob_pair_day_equals_month_collapses_to_exact() {
2560 assert_eq!(score_dob_pair(dob(1995, 5, 5), dob(1995, 5, 5)), 1.0);
2562 assert_eq!(score_dob_pair(dob(1995, 5, 5), dob(1995, 5, 6)), 0.0);
2564 }
2565
2566 #[test]
2567 fn dob_pair_invalid_swap_target_does_not_panic() {
2568 assert_eq!(score_dob_pair(dob(2000, 2, 29), dob(2000, 2, 29)), 1.0);
2572 assert_eq!(score_dob_pair(dob(2000, 2, 12), dob(2000, 12, 2)), 0.5);
2574 }
2575
2576 #[test]
2577 fn deterministic_match_still_rejects_transposed_dob() {
2578 let a = Worker::builder()
2582 .given_name("Thomas")
2583 .family_name("Price")
2584 .date_of_birth(dob(1995, 1, 10))
2585 .gender(Gender::Male)
2586 .build();
2587 let b = Worker::builder()
2588 .given_name("Thomas")
2589 .family_name("Price")
2590 .date_of_birth(dob(1995, 10, 1))
2591 .gender(Gender::Male)
2592 .build();
2593 assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
2594 }
2595
2596 #[test]
2597 fn transposed_dob_lifts_probabilistic_score_above_zero() {
2598 let a = Worker::builder()
2599 .given_name("Thomas")
2600 .family_name("Price")
2601 .date_of_birth(dob(1995, 1, 10))
2602 .gender(Gender::Male)
2603 .build();
2604 let b = Worker::builder()
2605 .given_name("Thomas")
2606 .family_name("Price")
2607 .date_of_birth(dob(1995, 10, 1))
2608 .gender(Gender::Male)
2609 .build();
2610 let r = MatchingEngine::default_config().match_workers(&a, &b);
2611 assert_eq!(r.breakdown.date_of_birth_score, Some(0.5));
2612 assert!(r.score > 0.6);
2616 }
2617
2618 #[test]
2621 fn confidence_band_boundaries_are_inclusive_on_the_low_side() {
2622 assert_eq!(Confidence::from_score(0.90), Confidence::High);
2623 assert_eq!(Confidence::from_score(0.89), Confidence::Medium);
2624 assert_eq!(Confidence::from_score(0.75), Confidence::Medium);
2625 assert_eq!(Confidence::from_score(0.74), Confidence::Low);
2626 }
2627
2628 #[test]
2629 fn confidence_handles_degenerate_inputs_gracefully() {
2630 assert_eq!(Confidence::from_score(f64::NAN), Confidence::Low);
2631 assert_eq!(Confidence::from_score(-1.0), Confidence::Low);
2632 assert_eq!(Confidence::from_score(2.0), Confidence::High);
2633 }
2634
2635 #[test]
2636 fn confidence_is_independent_of_match_threshold() {
2637 let p = Worker::builder()
2641 .given_name("Ada")
2642 .family_name("Lovelace")
2643 .build();
2644 let strict = MatchingEngine::new(MatchConfig::strict()).match_workers(&p, &p.clone());
2645 let lenient = MatchingEngine::new(MatchConfig::lenient()).match_workers(&p, &p.clone());
2646 assert_eq!(strict.confidence, lenient.confidence);
2647 assert_eq!(strict.confidence, Confidence::High);
2649 }
2650
2651 #[test]
2652 fn match_result_carries_confidence() {
2653 let p = Worker::builder()
2654 .given_name("Ada")
2655 .family_name("Lovelace")
2656 .build();
2657 let r = MatchingEngine::default_config().match_workers(&p, &p.clone());
2658 assert_eq!(r.confidence, Confidence::High);
2659 }
2660
2661 #[test]
2662 fn match_result_confidence_round_trips_via_serde() {
2663 let p = Worker::builder()
2664 .given_name("Ada")
2665 .family_name("Lovelace")
2666 .build();
2667 let r = MatchingEngine::default_config().match_workers(&p, &p.clone());
2668 let json = serde_json::to_string(&r).unwrap();
2669 let back: MatchResult = serde_json::from_str(&json).unwrap();
2670 assert_eq!(r.confidence, back.confidence);
2671 }
2672
2673 #[test]
2674 fn deterministic_rejects_when_names_missing() {
2675 let a = Worker::builder()
2676 .date_of_birth(dob(1980, 5, 15))
2677 .gender(Gender::Male)
2678 .build();
2679 let b = a.clone();
2680 assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
2681 }
2682
2683 #[test]
2686 fn score_named_place_both_subfields_blend_seven_three() {
2687 let a = Address::new().with_city("Paris").with_country("France");
2688 let b = Address::new().with_city("Paris").with_country("France");
2689 assert_eq!(score_named_place(&a, &b), Some(1.0));
2690 }
2691
2692 #[test]
2693 fn score_named_place_city_only_matches_returns_city_score() {
2694 let a = Address::new().with_city("Cardiff");
2695 let b = Address::new().with_city("Cardiff");
2696 assert_eq!(score_named_place(&a, &b), Some(1.0));
2697 }
2698
2699 #[test]
2700 fn score_named_place_country_only_matches_returns_country_score() {
2701 let a = Address::new().with_country("Wales");
2702 let b = Address::new().with_country("Wales");
2703 assert_eq!(score_named_place(&a, &b), Some(1.0));
2704 }
2705
2706 #[test]
2707 fn score_named_place_empty_returns_none() {
2708 let a = Address::new();
2709 let b = Address::new();
2710 assert_eq!(score_named_place(&a, &b), None);
2711 }
2712
2713 #[test]
2714 fn score_named_place_city_partial_country_mismatch_blends() {
2715 let a = Address::new().with_city("Paris").with_country("France");
2716 let b = Address::new().with_city("Paris").with_country("USA");
2717 let s = score_named_place(&a, &b).unwrap();
2718 assert!((s - 0.7).abs() < 1e-9);
2719 }
2720
2721 #[test]
2724 fn match_config_default_carries_death_weights() {
2725 let c = MatchConfig::default();
2726 assert!((c.death_date_weight - 0.10).abs() < 1e-9);
2727 assert!((c.death_place_weight - 0.05).abs() < 1e-9);
2728 }
2729
2730 #[test]
2731 fn breakdown_carries_death_date_score_when_both_sides_present() {
2732 let p1 = Worker::builder()
2733 .given_name("X")
2734 .family_name("Y")
2735 .death_date(dob(2020, 3, 14))
2736 .build();
2737 let p2 = Worker::builder()
2738 .given_name("X")
2739 .family_name("Y")
2740 .death_date(dob(2020, 3, 14))
2741 .build();
2742 let r = MatchingEngine::default_config().match_workers(&p1, &p2);
2743 assert_eq!(r.breakdown.death_date_score, Some(1.0));
2744 }
2745
2746 #[test]
2749 fn address_subscore_exact_postcode_plus_slightly_different_street_clears_seven_tenths() {
2750 let engine = MatchingEngine::default_config();
2754 let a = Address::new();
2755 let a = Address {
2756 line1: Some("10 High Street".into()),
2757 postcode: Some("CF10 1AA".into()),
2758 city: Some("Cardiff".into()),
2759 ..a
2760 };
2761 let b = Address {
2762 line1: Some("10 High Road".into()),
2763 postcode: Some("CF10 1AA".into()),
2764 city: Some("Cardiff".into()),
2765 ..Address::new()
2766 };
2767 let s = engine.compare_addresses(&a, &b);
2768 assert!(
2769 s >= 0.7,
2770 "exact postcode + slight street typo should score ≥ 0.7: {s}"
2771 );
2772 }
2773
2774 #[test]
2775 fn address_subscore_postcode_only_match_returns_one() {
2776 let engine = MatchingEngine::default_config();
2779 let a = Address {
2780 postcode: Some("CF10 1AA".into()),
2781 ..Address::new()
2782 };
2783 let b = Address {
2784 postcode: Some("CF10 1AA".into()),
2785 ..Address::new()
2786 };
2787 let s = engine.compare_addresses(&a, &b);
2788 assert!((s - 1.0).abs() < 1e-9, "postcode-only match: {s}");
2789 }
2790
2791 #[test]
2792 fn address_subscore_no_comparable_fields_returns_neutral_half() {
2793 let engine = MatchingEngine::default_config();
2795 let s = engine.compare_addresses(&Address::new(), &Address::new());
2796 assert!((s - 0.5).abs() < 1e-9, "neutral fallback: {s}");
2797 }
2798
2799 #[test]
2800 fn address_subscore_postcode_match_plus_street_mismatch_dominated_by_postcode() {
2801 let engine = MatchingEngine::default_config();
2803 let a = Address {
2804 postcode: Some("CF10 1AA".into()),
2805 line1: Some("Wholly Different".into()),
2806 ..Address::new()
2807 };
2808 let b = Address {
2809 postcode: Some("CF10 1AA".into()),
2810 line1: Some("Completely Other".into()),
2811 ..Address::new()
2812 };
2813 let s = engine.compare_addresses(&a, &b);
2814 assert!(s >= 0.5, "postcode should still dominate: {s}");
2815 }
2816
2817 #[test]
2818 fn breakdown_omits_death_place_score_when_one_side_absent() {
2819 let p1 = Worker::builder()
2820 .given_name("X")
2821 .family_name("Y")
2822 .death_place(Address::new().with_city("Cambridge"))
2823 .build();
2824 let p2 = Worker::builder().given_name("X").family_name("Y").build();
2825 let r = MatchingEngine::default_config().match_workers(&p1, &p2);
2826 assert_eq!(r.breakdown.death_place_score, None);
2827 }
2828}