use bumpalo::Bump;
use tracing::instrument;
use crate::{
matching::{
Feature, MatchingAlgorithm,
matchers::{jaro_winkler::JaroNameParts, soundex::SoundexNameParts},
run_features,
},
model::{Entity, SearchEntity},
};
pub struct NameBased;
impl MatchingAlgorithm for NameBased {
fn name() -> &'static str {
"name-based"
}
#[instrument(name = "score_hit", skip_all)]
fn score(bump: &Bump, lhs: &SearchEntity, rhs: &Entity, cutoff: f64) -> (f64, Vec<(&'static str, f64)>) {
let features: &[(&dyn Feature, f64)] = &[(&SoundexNameParts, 0.5), (&JaroNameParts, 0.5)];
let mut results = Vec::with_capacity(features.len());
let score = run_features(bump, lhs, rhs, 0.0, cutoff, features, &mut results);
(score.clamp(0.0, 1.0), results)
}
}
#[cfg(test)]
mod tests {
use bumpalo::Bump;
use float_cmp::approx_eq;
use crate::{
matching::{Algorithm, MatchingAlgorithm, name_based::NameBased},
model::{Entity, SearchEntity},
tests::python::nomenklatura_score,
};
#[test]
fn name_based() {
let e1 = SearchEntity::builder("Person").properties(&[("name", &["Vladimir Putin"])]).build();
let e2 = Entity::builder("Person").properties(&[("name", &["Vladimir Putin"])]).build();
let (score, _) = NameBased::score(&Bump::new(), &e1, &e2, 0.0);
assert_eq!(score, 1.0);
}
#[test]
#[serial_test::serial]
fn against_nomenklatura() {
pyo3::prepare_freethreaded_python();
let query = SearchEntity::builder("Person").properties(&[("name", &["Vladimir Putin"])]).build();
let results = vec![
Entity::builder("Person")
.id("Q7747")
.properties(&[("name", &["PUTIN, Vladimir Vladimirovich", "Владимир Владимирович Путин", "Vladimir Vladimirovich Putin"])])
.build(),
Entity::builder("Person")
.id("NK-5dEHMo3SqLdUgnTVvTtejp")
.properties(&[("name", &["Vladimir Nikitovich Skoch", "SKOCH, Vladimir Nikitovich", "Владимир Никитович Скоч"])])
.build(),
Entity::builder("Person")
.id("NK-8bMT7hixpkpiKCpEHUupAp")
.properties(&[("name", &["POLIN, Vladimir Anatolevich", "Владимир Анатольевич Полин", "Vladimir Anatolevich Polin"])])
.build(),
Entity::builder("Person")
.id("Q108898811")
.properties(&[("name", &["PLYAKIN, Vladimir Vladimirovich", "Vladimir Vladimirovich Plyakin", "Владимир Владимирович Плякин"])])
.build(),
];
let nscores = nomenklatura_score(Algorithm::NameBased, &query, results.clone()).unwrap();
for (index, (_, nscore)) in nscores.into_iter().enumerate() {
let (score, _) = NameBased::score(&Bump::new(), &query, results.get(index).unwrap(), 0.0);
assert!(approx_eq!(f64, score, nscore, epsilon = 0.05));
}
}
}