use crate::models::{Address, Patient};
use crate::normalizer::Normalizer;
use crate::scorer::{Scorer, SimilarityAlgorithm};
use nhs_number::NHSNumber;
use serde::{Deserialize, Serialize};
use std::str::FromStr;
#[derive(Debug, Clone)]
pub struct MatchConfig {
pub match_threshold: f64,
pub nhs_number_weight: f64,
pub given_name_weight: f64,
pub family_name_weight: f64,
pub date_of_birth_weight: f64,
pub gender_weight: f64,
pub address_weight: f64,
pub phone_weight: f64,
pub use_phonetic_matching: bool,
pub name_algorithm: SimilarityAlgorithm,
pub strict_mode: bool,
}
impl Default for MatchConfig {
fn default() -> Self {
Self {
match_threshold: 0.85,
nhs_number_weight: 0.30,
given_name_weight: 0.15,
family_name_weight: 0.20,
date_of_birth_weight: 0.20,
gender_weight: 0.05,
address_weight: 0.05,
phone_weight: 0.05,
use_phonetic_matching: true,
name_algorithm: SimilarityAlgorithm::Combined,
strict_mode: false,
}
}
}
impl MatchConfig {
pub fn strict() -> Self {
Self {
match_threshold: 0.95,
strict_mode: true,
..Default::default()
}
}
pub fn lenient() -> Self {
Self {
match_threshold: 0.75,
use_phonetic_matching: true,
..Default::default()
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MatchResult {
pub score: f64,
pub is_match: bool,
pub breakdown: MatchBreakdown,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MatchBreakdown {
pub nhs_number_score: Option<f64>,
pub given_name_score: Option<f64>,
pub family_name_score: Option<f64>,
pub date_of_birth_score: Option<f64>,
pub gender_score: Option<f64>,
pub address_score: Option<f64>,
pub phone_score: Option<f64>,
pub phonetic_name_score: Option<f64>,
}
pub struct MatchingEngine {
config: MatchConfig,
}
impl MatchingEngine {
pub fn new(config: MatchConfig) -> Self {
Self { config }
}
pub fn default_config() -> Self {
Self::new(MatchConfig::default())
}
pub fn match_patients(&self, patient1: &Patient, patient2: &Patient) -> MatchResult {
let breakdown = self.calculate_breakdown(patient1, patient2);
let score = self.calculate_weighted_score(&breakdown);
let is_match = score >= self.config.match_threshold;
MatchResult {
score,
is_match,
breakdown,
}
}
pub fn deterministic_match(&self, patient1: &Patient, patient2: &Patient) -> bool {
if let (Some(a), Some(b)) = (&patient1.nhs_number, &patient2.nhs_number)
&& let (Ok(a), Ok(b)) = (NHSNumber::from_str(a), NHSNumber::from_str(b))
&& a == b
{
return true;
}
let name_match = match (&patient1.given_name, &patient2.given_name) {
(Some(f1), Some(f2)) => {
Normalizer::normalize_name(f1) == Normalizer::normalize_name(f2)
}
_ => false,
} && match (&patient1.family_name, &patient2.family_name) {
(Some(l1), Some(l2)) => {
Normalizer::normalize_name(l1) == Normalizer::normalize_name(l2)
}
_ => false,
};
let dob_match = match (patient1.date_of_birth, patient2.date_of_birth) {
(Some(d1), Some(d2)) => d1 == d2,
_ => false,
};
let gender_match = match (patient1.gender, patient2.gender) {
(Some(g1), Some(g2)) => g1 == g2,
_ => true,
};
name_match && dob_match && gender_match
}
fn calculate_breakdown(&self, patient1: &Patient, patient2: &Patient) -> MatchBreakdown {
MatchBreakdown {
nhs_number_score: self.score_nhs_number(patient1, patient2),
given_name_score: self.score_given_name(patient1, patient2),
family_name_score: self.score_family_name(patient1, patient2),
date_of_birth_score: self.score_date_of_birth(patient1, patient2),
gender_score: self.score_gender(patient1, patient2),
address_score: self.score_address(patient1, patient2),
phone_score: self.score_phone(patient1, patient2),
phonetic_name_score: if self.config.use_phonetic_matching {
self.score_phonetic_names(patient1, patient2)
} else {
None
},
}
}
fn calculate_weighted_score(&self, breakdown: &MatchBreakdown) -> f64 {
let mut total_weight = 0.0;
let mut weighted_sum = 0.0;
if let Some(score) = breakdown.nhs_number_score {
weighted_sum += score * self.config.nhs_number_weight;
total_weight += self.config.nhs_number_weight;
}
if let Some(score) = breakdown.given_name_score {
weighted_sum += score * self.config.given_name_weight;
total_weight += self.config.given_name_weight;
}
if let Some(score) = breakdown.family_name_score {
weighted_sum += score * self.config.family_name_weight;
total_weight += self.config.family_name_weight;
}
if let Some(score) = breakdown.date_of_birth_score {
weighted_sum += score * self.config.date_of_birth_weight;
total_weight += self.config.date_of_birth_weight;
}
if let Some(score) = breakdown.gender_score {
weighted_sum += score * self.config.gender_weight;
total_weight += self.config.gender_weight;
}
if let Some(score) = breakdown.address_score {
weighted_sum += score * self.config.address_weight;
total_weight += self.config.address_weight;
}
if let Some(score) = breakdown.phone_score {
weighted_sum += score * self.config.phone_weight;
total_weight += self.config.phone_weight;
}
if let Some(score) = breakdown.phonetic_name_score
&& score > 0.9
{
weighted_sum += score * 0.05;
total_weight += 0.05;
}
if total_weight > 0.0 {
weighted_sum / total_weight
} else {
0.0
}
}
fn score_nhs_number(&self, patient1: &Patient, patient2: &Patient) -> Option<f64> {
if let (Some(a), Some(b)) = (&patient1.nhs_number, &patient2.nhs_number)
&& let (Ok(a), Ok(b)) = (NHSNumber::from_str(a), NHSNumber::from_str(b))
{
return Some(f64::from(a == b));
}
None
}
fn score_given_name(&self, patient1: &Patient, patient2: &Patient) -> Option<f64> {
match (&patient1.given_name, &patient2.given_name) {
(Some(name1), Some(name2)) => Some(self.score_name(name1, name2)),
_ => None,
}
}
fn score_family_name(&self, patient1: &Patient, patient2: &Patient) -> Option<f64> {
match (&patient1.family_name, &patient2.family_name) {
(Some(name1), Some(name2)) => Some(self.score_name(name1, name2)),
_ => None,
}
}
fn score_name(&self, name1: &str, name2: &str) -> f64 {
let norm1 = Normalizer::normalize_name(name1);
let norm2 = Normalizer::normalize_name(name2);
match self.config.name_algorithm {
SimilarityAlgorithm::JaroWinkler => Scorer::jaro_winkler_similarity(&norm1, &norm2),
SimilarityAlgorithm::Levenshtein => Scorer::levenshtein_similarity(&norm1, &norm2),
SimilarityAlgorithm::Exact => Scorer::exact_match(&norm1, &norm2),
SimilarityAlgorithm::Combined => Scorer::combined_similarity(&norm1, &norm2),
}
}
fn score_date_of_birth(&self, patient1: &Patient, patient2: &Patient) -> Option<f64> {
match (patient1.date_of_birth, patient2.date_of_birth) {
(Some(dob1), Some(dob2)) => Some(f64::from(dob1 == dob2)),
_ => None,
}
}
fn score_gender(&self, patient1: &Patient, patient2: &Patient) -> Option<f64> {
match (patient1.gender, patient2.gender) {
(Some(g1), Some(g2)) => Some(if g1 == g2 { 1.0 } else { 0.0 }),
_ => None,
}
}
fn score_address(&self, patient1: &Patient, patient2: &Patient) -> Option<f64> {
match (&patient1.address, &patient2.address) {
(Some(addr1), Some(addr2)) => Some(self.compare_addresses(addr1, addr2)),
_ => None,
}
}
fn compare_addresses(&self, addr1: &Address, addr2: &Address) -> f64 {
let mut scores = Vec::new();
if let (Some(pc1), Some(pc2)) = (&addr1.postcode, &addr2.postcode) {
let norm1 = Normalizer::normalize_postcode(pc1);
let norm2 = Normalizer::normalize_postcode(pc2);
scores.push(if norm1 == norm2 { 1.0 } else { 0.0 } * 0.5);
}
if let (Some(city1), Some(city2)) = (&addr1.city, &addr2.city) {
let norm1 = Normalizer::normalize_name(city1);
let norm2 = Normalizer::normalize_name(city2);
scores.push(Scorer::jaro_winkler_similarity(&norm1, &norm2) * 0.3);
}
if let (Some(line1), Some(line2)) = (&addr1.line1, &addr2.line1) {
let norm1 = Normalizer::normalize_name(line1);
let norm2 = Normalizer::normalize_name(line2);
scores.push(Scorer::jaro_winkler_similarity(&norm1, &norm2) * 0.2);
}
if scores.is_empty() {
0.5
} else {
scores.iter().sum::<f64>() / scores.len() as f64
}
}
fn score_phone(&self, patient1: &Patient, patient2: &Patient) -> Option<f64> {
let phone1 = patient1
.phone
.as_ref()
.or(patient1.mobile.as_ref())?
.clone();
let phone2 = patient2
.phone
.as_ref()
.or(patient2.mobile.as_ref())?
.clone();
let norm1 = Normalizer::normalize_phone(&phone1);
let norm2 = Normalizer::normalize_phone(&phone2);
Some(f64::from(norm1 == norm2))
}
fn score_phonetic_names(&self, patient1: &Patient, patient2: &Patient) -> Option<f64> {
let p1_given_name = patient1.given_name.as_ref()?;
let p1_given_name_phonetic = Normalizer::phonetic_code(p1_given_name);
let p1_family_name = patient1.family_name.as_ref()?;
let p1_family_name_phonetic = Normalizer::phonetic_code(p1_family_name);
let p2_given_name = patient2.given_name.as_ref()?;
let p2_given_name_phonetic = Normalizer::phonetic_code(p2_given_name);
let p2_family_name = patient2.family_name.as_ref()?;
let p2_family_name_phonetic = Normalizer::phonetic_code(p2_family_name);
let given_name_match = f64::from(p1_given_name_phonetic == p2_given_name_phonetic);
let family_name_match = f64::from(p1_family_name_phonetic == p2_family_name_phonetic);
Some((given_name_match + family_name_match) / 2.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::Gender;
use chrono::NaiveDate;
fn dob(y: i32, m: u32, d: u32) -> NaiveDate {
NaiveDate::from_ymd_opt(y, m, d).expect("valid date")
}
#[test]
fn config_default_values() {
let c = MatchConfig::default();
assert!((c.match_threshold - 0.85).abs() < 1e-9);
assert!((c.nhs_number_weight - 0.30).abs() < 1e-9);
assert!(c.use_phonetic_matching);
assert!(!c.strict_mode);
}
#[test]
fn config_strict_raises_threshold_and_sets_flag() {
let c = MatchConfig::strict();
assert!((c.match_threshold - 0.95).abs() < 1e-9);
assert!(c.strict_mode);
}
#[test]
fn config_lenient_lowers_threshold() {
let c = MatchConfig::lenient();
assert!((c.match_threshold - 0.75).abs() < 1e-9);
assert!(c.use_phonetic_matching);
}
#[test]
fn exact_clone_is_a_match() {
let p = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.nhs_number("9434765919")
.build();
let result = MatchingEngine::default_config().match_patients(&p, &p.clone());
assert!(result.is_match);
assert!(result.score > 0.95);
}
#[test]
fn fuzzy_given_name_still_matches() {
let a = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.build();
let b = Patient::builder()
.given_name("Jon")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.build();
let r = MatchingEngine::default_config().match_patients(&a, &b);
assert!(r.is_match);
assert!(r.score > 0.85);
}
#[test]
fn completely_different_patients_do_not_match() {
let a = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.build();
let b = Patient::builder()
.given_name("Jane")
.family_name("Doe")
.date_of_birth(dob(1990, 3, 20))
.gender(Gender::Female)
.build();
let r = MatchingEngine::default_config().match_patients(&a, &b);
assert!(!r.is_match);
assert!(r.score < 0.5);
}
#[test]
fn no_overlapping_fields_returns_zero_score() {
let a = Patient::builder().given_name("Solo").build();
let b = Patient::builder().family_name("Only").build();
let r = MatchingEngine::default_config().match_patients(&a, &b);
assert_eq!(r.score, 0.0);
assert!(!r.is_match);
}
#[test]
fn unparseable_nhs_number_is_none_not_zero() {
let a = Patient::builder()
.nhs_number("not-a-number")
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.build();
let b = Patient::builder()
.nhs_number("also-not-a-number")
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.build();
let r = MatchingEngine::default_config().match_patients(&a, &b);
assert_eq!(
r.breakdown.nhs_number_score, None,
"unparseable NHS numbers should not produce a 0.0 penalty"
);
assert!(r.is_match, "should still match on demographics");
}
#[test]
fn missing_field_yields_none_in_breakdown() {
let a = Patient::builder().given_name("Ada").build();
let b = Patient::builder()
.given_name("Ada")
.family_name("Lovelace")
.build();
let r = MatchingEngine::default_config().match_patients(&a, &b);
assert!(r.breakdown.given_name_score.is_some());
assert!(r.breakdown.family_name_score.is_none());
}
#[test]
fn phonetic_match_is_a_bonus_not_a_penalty() {
let p = Patient::builder()
.given_name("Stephen")
.family_name("Jones")
.build();
let with_phon = MatchingEngine::new(MatchConfig {
use_phonetic_matching: true,
..MatchConfig::default()
})
.match_patients(&p, &p.clone());
let without_phon = MatchingEngine::new(MatchConfig {
use_phonetic_matching: false,
..MatchConfig::default()
})
.match_patients(&p, &p.clone());
assert!(with_phon.score >= without_phon.score);
}
#[test]
fn phonetic_score_disabled_when_config_off() {
let p = Patient::builder()
.given_name("Steven")
.family_name("Smith")
.build();
let q = Patient::builder()
.given_name("Stephen")
.family_name("Smyth")
.build();
let r = MatchingEngine::new(MatchConfig {
use_phonetic_matching: false,
..MatchConfig::default()
})
.match_patients(&p, &q);
assert_eq!(r.breakdown.phonetic_name_score, None);
}
#[test]
fn address_with_no_subfields_is_neutral_half() {
let a = Address::new();
let b = Address::new();
let engine = MatchingEngine::default_config();
let score = engine.compare_addresses(&a, &b);
assert!(
(score - 0.5).abs() < 1e-9,
"empty addresses must be neutral (0.5), got {score}"
);
}
#[test]
fn address_postcode_dominates() {
let mut a = Address::new();
a.postcode = Some("CF10 1AA".into());
let mut b = Address::new();
b.postcode = Some("CF10 1AA".into());
let s = MatchingEngine::default_config().compare_addresses(&a, &b);
assert!(s > 0.0);
}
#[test]
fn deterministic_nhs_match_overrides_demographics() {
let a = Patient::builder()
.nhs_number("943 476 5919")
.given_name("Bob")
.build();
let b = Patient::builder()
.nhs_number("9434765919")
.given_name("Alice") .build();
assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn deterministic_demographics_match_when_all_align() {
let p = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.build();
assert!(MatchingEngine::default_config().deterministic_match(&p, &p.clone()));
}
#[test]
fn deterministic_demographics_tolerates_missing_gender() {
let a = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.build();
let b = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.build();
assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn deterministic_rejects_when_dob_differs() {
let a = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.build();
let b = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 16)) .gender(Gender::Male)
.build();
assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn deterministic_rejects_when_gender_differs() {
let a = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.build();
let b = Patient::builder()
.given_name("John")
.family_name("Smith")
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Female)
.build();
assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn deterministic_rejects_when_names_missing() {
let a = Patient::builder()
.date_of_birth(dob(1980, 5, 15))
.gender(Gender::Male)
.build();
let b = a.clone();
assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
}
}