use crate::models::{Address, Event, Location};
use crate::normalizer::Normalizer;
use crate::scorer::{Scorer, SimilarityAlgorithm};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct MatchConfig {
pub match_threshold: f64,
pub name_weight: f64,
pub start_date_weight: f64,
pub start_date_scale_seconds: f64,
pub end_date_weight: f64,
pub location_weight: f64,
pub coordinates_scale_metres: f64,
pub category_weight: f64,
pub country_code_weight: f64,
pub event_ids_weight: f64,
pub organizer_weight: f64,
pub performers_weight: f64,
pub url_weight: f64,
pub use_phonetic_matching: bool,
pub name_algorithm: SimilarityAlgorithm,
pub strict_mode: bool,
}
impl Default for MatchConfig {
fn default() -> Self {
Self {
match_threshold: 0.80,
name_weight: 0.20,
start_date_weight: 0.25,
start_date_scale_seconds: 3600.0,
end_date_weight: 0.05,
location_weight: 0.15,
coordinates_scale_metres: 100.0,
category_weight: 0.08,
country_code_weight: 0.04,
event_ids_weight: 0.15,
organizer_weight: 0.04,
performers_weight: 0.02,
url_weight: 0.02,
use_phonetic_matching: false,
name_algorithm: SimilarityAlgorithm::Combined,
strict_mode: false,
}
}
}
impl MatchConfig {
#[must_use]
pub fn strict() -> Self {
Self {
match_threshold: 0.95,
strict_mode: true,
..Default::default()
}
}
#[must_use]
pub fn lenient() -> Self {
Self {
match_threshold: 0.65,
use_phonetic_matching: true,
..Default::default()
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Confidence {
High,
Medium,
Low,
}
impl Confidence {
#[must_use]
pub fn from_score(score: f64) -> Self {
if score >= 0.90 {
Confidence::High
} else if score >= 0.75 {
Confidence::Medium
} else {
Confidence::Low
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MatchResult {
pub score: f64,
pub is_match: bool,
#[serde(default = "default_confidence")]
pub confidence: Confidence,
pub breakdown: MatchBreakdown,
}
fn default_confidence() -> Confidence {
Confidence::Low
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MatchBreakdown {
pub name_score: Option<f64>,
pub name_phonetic_score: Option<f64>,
pub start_date_score: Option<f64>,
pub end_date_score: Option<f64>,
pub location_score: Option<f64>,
pub category_score: Option<f64>,
pub country_code_score: Option<f64>,
pub event_ids_score: Option<f64>,
pub organizer_score: Option<f64>,
pub performers_score: Option<f64>,
pub url_score: Option<f64>,
}
pub struct MatchingEngine {
config: MatchConfig,
}
impl MatchingEngine {
#[must_use]
pub fn new(config: MatchConfig) -> Self {
Self { config }
}
#[must_use]
pub fn default_config() -> Self {
Self::new(MatchConfig::default())
}
#[must_use]
pub fn match_events(&self, event1: &Event, event2: &Event) -> MatchResult {
let breakdown = self.calculate_breakdown(event1, event2);
let score = self.calculate_weighted_score(&breakdown);
let above_threshold = score >= self.config.match_threshold;
let is_match = if self.config.strict_mode {
above_threshold && self.deterministic_match(event1, event2)
} else {
above_threshold
};
let confidence = Confidence::from_score(score);
MatchResult {
score,
is_match,
confidence,
breakdown,
}
}
#[must_use]
pub fn match_one_to_many(&self, query: &Event, candidates: &[Event]) -> Vec<MatchResult> {
candidates
.iter()
.map(|c| self.match_events(query, c))
.collect()
}
#[must_use]
pub fn rank_one_to_many(
&self,
query: &Event,
candidates: &[Event],
) -> Vec<(usize, MatchResult)> {
let mut indexed: Vec<(usize, MatchResult)> = self
.match_one_to_many(query, candidates)
.into_iter()
.enumerate()
.collect();
indexed.sort_by(|a, b| {
b.1.score
.partial_cmp(&a.1.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
indexed
}
#[must_use]
pub fn deterministic_match(&self, event1: &Event, event2: &Event) -> bool {
if shares_event_id(event1, event2) {
return true;
}
name_and_start_date_match(event1, event2)
}
fn calculate_breakdown(&self, event1: &Event, event2: &Event) -> MatchBreakdown {
MatchBreakdown {
name_score: self.score_name(event1, event2),
name_phonetic_score: if self.config.use_phonetic_matching {
Self::score_phonetic_names(event1, event2)
} else {
None
},
start_date_score: self.score_start_date(event1, event2),
end_date_score: self.score_end_date(event1, event2),
location_score: self.score_location(event1, event2),
category_score: score_category(event1, event2),
country_code_score: score_country_code(event1, event2),
event_ids_score: score_event_ids(event1, event2),
organizer_score: Self::score_organizer(event1, event2),
performers_score: Self::score_performers(event1, event2),
url_score: score_url(event1, event2),
}
}
fn calculate_weighted_score(&self, breakdown: &MatchBreakdown) -> f64 {
let mut total_weight = 0.0;
let mut weighted_sum = 0.0;
let mut accumulate = |opt: Option<f64>, weight: f64| {
if let Some(score) = opt {
weighted_sum += score * weight;
total_weight += weight;
}
};
accumulate(breakdown.name_score, self.config.name_weight);
accumulate(breakdown.start_date_score, self.config.start_date_weight);
accumulate(breakdown.end_date_score, self.config.end_date_weight);
accumulate(breakdown.location_score, self.config.location_weight);
accumulate(breakdown.category_score, self.config.category_weight);
accumulate(
breakdown.country_code_score,
self.config.country_code_weight,
);
accumulate(breakdown.event_ids_score, self.config.event_ids_weight);
accumulate(breakdown.organizer_score, self.config.organizer_weight);
accumulate(breakdown.performers_score, self.config.performers_weight);
accumulate(breakdown.url_score, self.config.url_weight);
if let Some(score) = breakdown.name_phonetic_score
&& score > 0.9
{
weighted_sum += score * 0.05;
total_weight += 0.05;
}
if total_weight > 0.0 {
weighted_sum / total_weight
} else {
0.0
}
}
fn score_name(&self, e1: &Event, e2: &Event) -> Option<f64> {
let names1 = collect_names(e1);
let names2 = collect_names(e2);
if names1.is_empty() || names2.is_empty() {
return None;
}
let mut best = f64::NEG_INFINITY;
for n1 in &names1 {
for n2 in &names2 {
let s = self.score_name_pair(n1, n2);
if s > best {
best = s;
}
}
}
Some(best)
}
fn score_name_pair(&self, name1: &str, name2: &str) -> f64 {
let norm1 = Normalizer::normalize_name(name1);
let norm2 = Normalizer::normalize_name(name2);
match self.config.name_algorithm {
SimilarityAlgorithm::JaroWinkler => Scorer::jaro_winkler_similarity(&norm1, &norm2),
SimilarityAlgorithm::Levenshtein => Scorer::levenshtein_similarity(&norm1, &norm2),
SimilarityAlgorithm::Exact => Scorer::exact_match(&norm1, &norm2),
SimilarityAlgorithm::Combined => Scorer::combined_similarity(&norm1, &norm2),
}
}
fn score_phonetic_names(e1: &Event, e2: &Event) -> Option<f64> {
let names1 = collect_names(e1);
let names2 = collect_names(e2);
if names1.is_empty() || names2.is_empty() {
return None;
}
let codes1: Vec<String> = names1
.iter()
.map(|n| Normalizer::phonetic_code(n))
.collect();
let codes2: Vec<String> = names2
.iter()
.map(|n| Normalizer::phonetic_code(n))
.collect();
let mut best = 0.0_f64;
for c1 in &codes1 {
for c2 in &codes2 {
if !c1.is_empty() && c1 == c2 {
best = 1.0;
}
}
}
Some(best)
}
#[allow(clippy::cast_precision_loss)]
fn score_start_date(&self, e1: &Event, e2: &Event) -> Option<f64> {
let d = Scorer::seconds_between(e1.start_date.as_deref()?, e2.start_date.as_deref()?)?;
Some(Scorer::start_date_score(
d as f64,
self.config.start_date_scale_seconds,
))
}
#[allow(clippy::cast_precision_loss)]
fn score_end_date(&self, e1: &Event, e2: &Event) -> Option<f64> {
let d = Scorer::seconds_between(e1.end_date.as_deref()?, e2.end_date.as_deref()?)?;
Some(Scorer::start_date_score(
d as f64,
self.config.start_date_scale_seconds,
))
}
fn score_location(&self, e1: &Event, e2: &Event) -> Option<f64> {
match (e1.location.as_ref(), e2.location.as_ref()) {
(Some(l1), Some(l2)) => Some(self.compare_locations(l1, l2)),
_ => None,
}
}
fn compare_locations(&self, l1: &Location, l2: &Location) -> f64 {
let mut weighted_sum = 0.0_f64;
let mut total_weight = 0.0_f64;
if let (Some(lat1), Some(lon1), Some(lat2), Some(lon2)) =
(l1.latitude, l1.longitude, l2.latitude, l2.longitude)
&& let (Some((la1, lo1)), Some((la2, lo2))) = (
valid_coords(Some(lat1), Some(lon1)),
valid_coords(Some(lat2), Some(lon2)),
)
{
let d = Scorer::haversine_metres(la1, lo1, la2, lo2);
weighted_sum +=
Scorer::coordinates_score(d, self.config.coordinates_scale_metres) * 0.5;
total_weight += 0.5;
}
if let (Some(a1), Some(a2)) = (l1.address.as_ref(), l2.address.as_ref()) {
weighted_sum += compare_addresses(a1, a2) * 0.3;
total_weight += 0.3;
}
if let (Some(v1), Some(v2)) = (l1.venue_name.as_deref(), l2.venue_name.as_deref()) {
let n1 = Normalizer::normalize_name(v1);
let n2 = Normalizer::normalize_name(v2);
weighted_sum += Scorer::combined_similarity(&n1, &n2) * 0.15;
total_weight += 0.15;
}
if let (Some(u1), Some(u2)) = (l1.virtual_url.as_deref(), l2.virtual_url.as_deref()) {
weighted_sum += f64::from(u1.trim() == u2.trim()) * 0.05;
total_weight += 0.05;
}
if total_weight == 0.0 {
0.5
} else {
weighted_sum / total_weight
}
}
fn score_organizer(e1: &Event, e2: &Event) -> Option<f64> {
let o1 = e1.organizer.as_deref()?;
let o2 = e2.organizer.as_deref()?;
let n1 = Normalizer::normalize_name(o1);
let n2 = Normalizer::normalize_name(o2);
Some(Scorer::combined_similarity(&n1, &n2))
}
fn score_performers(e1: &Event, e2: &Event) -> Option<f64> {
if e1.performers.is_empty() || e2.performers.is_empty() {
return None;
}
let mut best = 0.0_f64;
for a in &e1.performers {
for b in &e2.performers {
let na = Normalizer::normalize_name(a);
let nb = Normalizer::normalize_name(b);
let s = Scorer::combined_similarity(&na, &nb);
if s > best {
best = s;
}
}
}
Some(best)
}
}
fn collect_names(event: &Event) -> Vec<&String> {
event
.name
.iter()
.chain(event.alternate_names.iter())
.filter(|s| !s.trim().is_empty())
.collect()
}
fn valid_coords(lat: Option<f64>, lon: Option<f64>) -> Option<(f64, f64)> {
let lat = lat?;
let lon = lon?;
if !lat.is_finite() || !lon.is_finite() {
return None;
}
if !(-90.0..=90.0).contains(&lat) || !(-180.0..=180.0).contains(&lon) {
return None;
}
Some((lat, lon))
}
fn score_category(e1: &Event, e2: &Event) -> Option<f64> {
match (&e1.category, &e2.category) {
(Some(a), Some(b)) => Some(if a == b { 1.0 } else { 0.0 }),
_ => None,
}
}
fn score_country_code(e1: &Event, e2: &Event) -> Option<f64> {
let a = e1.country_code_as_iso_3166_1_alpha_2.as_ref()?;
let b = e2.country_code_as_iso_3166_1_alpha_2.as_ref()?;
let na = a.trim().to_ascii_lowercase();
let nb = b.trim().to_ascii_lowercase();
Some(if na == nb { 1.0 } else { 0.0 })
}
fn shares_event_id(e1: &Event, e2: &Event) -> bool {
if e1.event_ids.is_empty() || e2.event_ids.is_empty() {
return false;
}
for id1 in &e1.event_ids {
for id2 in &e2.event_ids {
if id1 == id2 {
return true;
}
}
}
false
}
fn score_event_ids(e1: &Event, e2: &Event) -> Option<f64> {
if e1.event_ids.is_empty() || e2.event_ids.is_empty() {
return None;
}
Some(if shares_event_id(e1, e2) { 1.0 } else { 0.0 })
}
fn score_url(e1: &Event, e2: &Event) -> Option<f64> {
let u1 = e1.url.as_deref()?;
let u2 = e2.url.as_deref()?;
Some(f64::from(u1.trim() == u2.trim()))
}
fn name_and_start_date_match(e1: &Event, e2: &Event) -> bool {
let (Some(n1), Some(n2)) = (&e1.name, &e2.name) else {
return false;
};
if Normalizer::normalize_name(n1) != Normalizer::normalize_name(n2) {
return false;
}
let (Some(sd1), Some(sd2)) = (&e1.start_date, &e2.start_date) else {
return false;
};
match (
Normalizer::parse_iso8601_unix_seconds(sd1),
Normalizer::parse_iso8601_unix_seconds(sd2),
) {
(Some(a), Some(b)) => a == b,
_ => false,
}
}
fn compare_addresses(addr1: &Address, addr2: &Address) -> f64 {
let mut weighted_sum = 0.0_f64;
let mut total_weight = 0.0_f64;
if let (Some(pc1), Some(pc2)) = (&addr1.postcode, &addr2.postcode) {
let norm1 = Normalizer::normalize_postcode(pc1);
let norm2 = Normalizer::normalize_postcode(pc2);
weighted_sum += f64::from(norm1 == norm2) * 0.5;
total_weight += 0.5;
}
if let (Some(city1), Some(city2)) = (&addr1.city, &addr2.city) {
let norm1 = Normalizer::normalize_name(city1);
let norm2 = Normalizer::normalize_name(city2);
weighted_sum += Scorer::jaro_winkler_similarity(&norm1, &norm2) * 0.3;
total_weight += 0.3;
}
if let (Some(line1), Some(line2)) = (&addr1.line1, &addr2.line1) {
let parsed1 = Normalizer::parse_address_line(line1);
let parsed2 = Normalizer::parse_address_line(line2);
let street_sim = Scorer::jaro_winkler_similarity(&parsed1.street, &parsed2.street);
let house_score = match (&parsed1.house_number, &parsed2.house_number) {
(Some(a), Some(b)) => Some(f64::from(a == b)),
_ => None,
};
let line1_score = match house_score {
Some(h) => 0.6 * street_sim + 0.4 * h,
None => street_sim,
};
weighted_sum += line1_score * 0.2;
total_weight += 0.2;
}
if total_weight == 0.0 {
0.5
} else {
weighted_sum / total_weight
}
}
#[cfg(test)]
#[allow(clippy::float_cmp)]
mod tests {
use super::*;
use crate::models::{EventCategory, EventId, EventIdScheme};
#[test]
fn config_default_values() {
let c = MatchConfig::default();
assert!((c.match_threshold - 0.80).abs() < 1e-9);
assert!(!c.strict_mode);
}
#[test]
fn config_strict_raises_threshold_and_sets_flag() {
let c = MatchConfig::strict();
assert!((c.match_threshold - 0.95).abs() < 1e-9);
assert!(c.strict_mode);
}
#[test]
fn config_lenient_lowers_threshold() {
let c = MatchConfig::lenient();
assert!((c.match_threshold - 0.65).abs() < 1e-9);
assert!(c.use_phonetic_matching);
}
#[test]
fn config_default_round_trips_through_json() {
let cfg = MatchConfig::default();
let json = serde_json::to_string(&cfg).expect("serialise");
let back: MatchConfig = serde_json::from_str(&json).expect("deserialise");
assert!((cfg.match_threshold - back.match_threshold).abs() < 1e-12);
assert!((cfg.name_weight - back.name_weight).abs() < 1e-12);
assert!((cfg.start_date_weight - back.start_date_weight).abs() < 1e-12);
assert!(matches!(back.name_algorithm, SimilarityAlgorithm::Combined));
assert_eq!(cfg.strict_mode, back.strict_mode);
}
#[test]
fn config_partial_json_fills_missing_fields_from_default() {
let partial = r#"{"match_threshold": 0.80}"#;
let cfg: MatchConfig = serde_json::from_str(partial).expect("partial json");
assert!((cfg.match_threshold - 0.80).abs() < 1e-12);
assert!(matches!(cfg.name_algorithm, SimilarityAlgorithm::Combined));
}
#[test]
fn exact_clone_is_a_match() {
let e = Event::builder()
.name("RustConf 2024")
.start_date("2024-09-10T09:00:00Z")
.build();
let result = MatchingEngine::default_config().match_events(&e, &e.clone());
assert!(result.is_match);
assert!(result.score > 0.95);
}
#[test]
fn name_match_takes_best_of_cartesian_product() {
let p1 = Event::builder().name("RustConf 2024").build();
let p2 = Event::builder()
.name("Rust Conference 2024")
.add_alternate_name("RustConf 2024")
.build();
let r = MatchingEngine::default_config().match_events(&p1, &p2);
let s = r.breakdown.name_score.expect("scored");
assert!(s > 0.99, "got {s}");
}
#[test]
fn unrelated_events_do_not_match() {
let a = Event::builder()
.name("RustConf 2024")
.start_date("2024-09-10T09:00:00Z")
.build();
let b = Event::builder()
.name("Sydney Opera Concert")
.start_date("2025-03-15T20:00:00Z")
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(!r.is_match);
assert!(r.score < 0.5);
}
#[test]
fn no_overlapping_fields_returns_zero_score() {
let a = Event::builder().url("https://example.org/a").build();
let b = Event::builder().url("https://example.org/b").build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert_eq!(r.score, 0.0);
}
#[test]
fn start_date_score_one_when_identical() {
let a = Event::builder()
.name("X")
.start_date("2024-06-26T09:00:00Z")
.build();
let b = a.clone();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!((r.breakdown.start_date_score.unwrap() - 1.0).abs() < 1e-9);
}
#[test]
fn start_date_score_decays_with_time_gap() {
let a = Event::builder()
.name("X")
.start_date("2024-06-26T09:00:00Z")
.build();
let b = Event::builder()
.name("X")
.start_date("2024-07-26T09:00:00Z")
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(r.breakdown.start_date_score.unwrap() < 1e-3);
}
#[test]
fn start_date_score_none_when_one_side_missing() {
let a = Event::builder().name("X").start_date("2024-06-26").build();
let b = Event::builder().name("X").build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(r.breakdown.start_date_score.is_none());
}
#[test]
fn start_date_score_none_when_garbage() {
let a = Event::builder().name("X").start_date("not-a-date").build();
let b = Event::builder().name("X").start_date("2024-06-26").build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(r.breakdown.start_date_score.is_none());
}
#[test]
fn category_equality_scores_one_else_zero() {
let a = Event::builder()
.name("X")
.category(EventCategory::MusicEvent)
.build();
let b = Event::builder()
.name("X")
.category(EventCategory::MusicEvent)
.build();
let c = Event::builder()
.name("X")
.category(EventCategory::ComedyEvent)
.build();
let engine = MatchingEngine::default_config();
assert_eq!(
engine.match_events(&a, &b).breakdown.category_score,
Some(1.0)
);
assert_eq!(
engine.match_events(&a, &c).breakdown.category_score,
Some(0.0)
);
}
#[test]
fn category_score_none_when_either_missing() {
let a = Event::builder()
.name("X")
.category(EventCategory::MusicEvent)
.build();
let b = Event::builder().name("X").build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(r.breakdown.category_score.is_none());
}
#[test]
fn country_code_case_insensitive_equality() {
let a = Event::builder()
.name("X")
.country_code_as_iso_3166_1_alpha_2("gb")
.build();
let b = Event::builder()
.name("X")
.country_code_as_iso_3166_1_alpha_2("GB")
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert_eq!(r.breakdown.country_code_score, Some(1.0));
}
#[test]
fn country_code_mismatch_scores_zero() {
let a = Event::builder()
.name("X")
.country_code_as_iso_3166_1_alpha_2("GB")
.build();
let b = Event::builder()
.name("X")
.country_code_as_iso_3166_1_alpha_2("FR")
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert_eq!(r.breakdown.country_code_score, Some(0.0));
}
#[test]
fn event_ids_shared_scores_one() {
let id = EventId::new(EventIdScheme::Eventbrite, "12345").unwrap();
let a = Event::builder().name("X").add_event_id(id.clone()).build();
let b = Event::builder().name("X").add_event_id(id).build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert_eq!(r.breakdown.event_ids_score, Some(1.0));
}
#[test]
fn event_ids_scheme_scoped_no_cross_match() {
let a = Event::builder()
.name("X")
.add_event_id(EventId::new(EventIdScheme::Eventbrite, "X").unwrap())
.build();
let b = Event::builder()
.name("X")
.add_event_id(EventId::new(EventIdScheme::Meetup, "X").unwrap())
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert_eq!(r.breakdown.event_ids_score, Some(0.0));
}
#[test]
fn event_ids_none_when_either_side_empty() {
let a = Event::builder().name("X").build();
let b = Event::builder()
.name("X")
.add_event_id(EventId::new(EventIdScheme::Eventbrite, "Q1").unwrap())
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(r.breakdown.event_ids_score.is_none());
}
#[test]
fn deterministic_via_shared_event_id() {
let id = EventId::new(EventIdScheme::Eventbrite, "12345").unwrap();
let a = Event::builder()
.name("RustConf 2024")
.add_event_id(id.clone())
.build();
let b = Event::builder()
.name("Wholly Different")
.add_event_id(id)
.build();
assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn deterministic_via_name_and_start_date() {
let a = Event::builder()
.name("RustConf 2024")
.start_date("2024-09-10T09:00:00Z")
.build();
let b = Event::builder()
.name("RustConf 2024")
.start_date("2024-09-10T09:00:00Z")
.build();
assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn deterministic_via_name_and_start_date_accepts_equivalent_offsets() {
let a = Event::builder()
.name("RustConf 2024")
.start_date("2024-09-10T09:00:00Z")
.build();
let b = Event::builder()
.name("RustConf 2024")
.start_date("2024-09-10T11:00:00+02:00")
.build();
assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn deterministic_rejects_when_name_differs_and_no_shared_id() {
let a = Event::builder()
.name("X")
.start_date("2024-09-10T09:00:00Z")
.build();
let b = Event::builder()
.name("Y")
.start_date("2024-09-10T09:00:00Z")
.build();
assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn deterministic_rejects_when_start_date_missing_and_no_shared_id() {
let a = Event::builder().name("X").build();
let b = Event::builder().name("X").build();
assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
}
#[test]
fn strict_mode_requires_deterministic_for_is_match() {
let cfg = MatchConfig {
match_threshold: 0.50,
strict_mode: true,
..MatchConfig::default()
};
let e1 = Event::builder()
.name("Cafe Centrale Concert")
.start_date("2024-09-10T09:00:00Z")
.build();
let e2 = Event::builder()
.name("Cafe Central Concert") .start_date("2024-09-10T09:00:00Z")
.build();
let engine = MatchingEngine::new(cfg);
let r = engine.match_events(&e1, &e2);
assert!(r.score >= 0.50);
assert!(!engine.deterministic_match(&e1, &e2));
assert!(!r.is_match);
}
#[test]
fn match_one_to_many_empty_candidates_yields_empty_vec() {
let engine = MatchingEngine::default_config();
let q = Event::builder().name("Solo").build();
assert!(engine.match_one_to_many(&q, &[]).is_empty());
}
#[test]
fn rank_one_to_many_sorts_by_score_descending() {
let engine = MatchingEngine::default_config();
let q = Event::builder().name("RustConf 2024").build();
let candidates = vec![
Event::builder().name("PyConf 2024").build(),
q.clone(),
Event::builder().name("GoConf 2024").build(),
];
let ranked = engine.rank_one_to_many(&q, &candidates);
assert_eq!(ranked[0].0, 1);
for w in ranked.windows(2) {
assert!(w[0].1.score >= w[1].1.score);
}
}
#[test]
fn confidence_band_boundaries_are_inclusive_on_the_low_side() {
assert_eq!(Confidence::from_score(0.90), Confidence::High);
assert_eq!(Confidence::from_score(0.89), Confidence::Medium);
assert_eq!(Confidence::from_score(0.75), Confidence::Medium);
assert_eq!(Confidence::from_score(0.74), Confidence::Low);
}
#[test]
fn location_postcode_match_dominates() {
let l1 = Location::new().with_address(Address::new().with_postcode("BA4 4BY"));
let l2 = Location::new().with_address(Address::new().with_postcode("BA4 4BY"));
let s = MatchingEngine::default_config().compare_locations(&l1, &l2);
assert!((s - 1.0).abs() < 1e-9, "got {s}");
}
#[test]
fn location_score_none_when_either_side_absent() {
let a = Event::builder()
.name("X")
.location(Location::new().with_venue_name("Worthy Farm"))
.build();
let b = Event::builder().name("X").build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(r.breakdown.location_score.is_none());
}
#[test]
fn organizer_match_after_normalisation() {
let a = Event::builder()
.name("X")
.organizer("Rust Foundation")
.build();
let b = Event::builder()
.name("X")
.organizer("rust foundation")
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(r.breakdown.organizer_score.unwrap() > 0.99);
}
#[test]
fn performers_match_takes_best_of_cartesian_product() {
let a = Event::builder()
.name("X")
.add_performer("Niko Matsakis")
.add_performer("Tyler Mandry")
.build();
let b = Event::builder()
.name("X")
.add_performer("Carol Nichols")
.add_performer("Niko Matsakis")
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert!(r.breakdown.performers_score.unwrap() > 0.99);
}
#[test]
fn url_match_is_exact_after_trim() {
let a = Event::builder()
.name("X")
.url("https://rustconf.com")
.build();
let b = Event::builder()
.name("X")
.url(" https://rustconf.com ")
.build();
let r = MatchingEngine::default_config().match_events(&a, &b);
assert_eq!(r.breakdown.url_score, Some(1.0));
}
#[test]
fn phonetic_score_none_when_off() {
let p = Event::builder().name("Stephen Concert").build();
let q = Event::builder().name("Steven Concert").build();
let r = MatchingEngine::new(MatchConfig {
use_phonetic_matching: false,
..MatchConfig::default()
})
.match_events(&p, &q);
assert!(r.breakdown.name_phonetic_score.is_none());
}
#[test]
fn phonetic_score_some_when_on() {
let p = Event::builder().name("Stephen").build();
let q = Event::builder().name("Steven").build();
let r = MatchingEngine::new(MatchConfig {
use_phonetic_matching: true,
..MatchConfig::default()
})
.match_events(&p, &q);
assert!(r.breakdown.name_phonetic_score.is_some());
}
}