1use crate::models::{Address, Event, Location};
39use crate::normalizer::Normalizer;
40use crate::scorer::{Scorer, SimilarityAlgorithm};
41use serde::{Deserialize, Serialize};
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
83#[serde(default)]
84pub struct MatchConfig {
85 pub match_threshold: f64,
87
88 pub name_weight: f64,
91
92 pub start_date_weight: f64,
95
96 pub start_date_scale_seconds: f64,
100
101 pub end_date_weight: f64,
104
105 pub location_weight: f64,
108
109 pub coordinates_scale_metres: f64,
112
113 pub category_weight: f64,
116
117 pub country_code_weight: f64,
120
121 pub event_ids_weight: f64,
124
125 pub organizer_weight: f64,
128
129 pub performers_weight: f64,
132
133 pub url_weight: f64,
135
136 pub use_phonetic_matching: bool,
138
139 pub name_algorithm: SimilarityAlgorithm,
141
142 pub strict_mode: bool,
146}
147
148impl Default for MatchConfig {
149 fn default() -> Self {
158 Self {
159 match_threshold: 0.80,
160 name_weight: 0.20,
161 start_date_weight: 0.25,
162 start_date_scale_seconds: 3600.0,
163 end_date_weight: 0.05,
164 location_weight: 0.15,
165 coordinates_scale_metres: 100.0,
166 category_weight: 0.08,
167 country_code_weight: 0.04,
168 event_ids_weight: 0.15,
169 organizer_weight: 0.04,
170 performers_weight: 0.02,
171 url_weight: 0.02,
172 use_phonetic_matching: false,
173 name_algorithm: SimilarityAlgorithm::Combined,
174 strict_mode: false,
175 }
176 }
177}
178
179impl MatchConfig {
180 #[must_use]
192 pub fn strict() -> Self {
193 Self {
194 match_threshold: 0.95,
195 strict_mode: true,
196 ..Default::default()
197 }
198 }
199
200 #[must_use]
212 pub fn lenient() -> Self {
213 Self {
214 match_threshold: 0.65,
215 use_phonetic_matching: true,
216 ..Default::default()
217 }
218 }
219}
220
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
237pub enum Confidence {
238 High,
240 Medium,
242 Low,
244}
245
246impl Confidence {
247 #[must_use]
260 pub fn from_score(score: f64) -> Self {
261 if score >= 0.90 {
262 Confidence::High
263 } else if score >= 0.75 {
264 Confidence::Medium
265 } else {
266 Confidence::Low
267 }
268 }
269}
270
271#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct MatchResult {
281 pub score: f64,
283
284 pub is_match: bool,
286
287 #[serde(default = "default_confidence")]
290 pub confidence: Confidence,
291
292 pub breakdown: MatchBreakdown,
294}
295
296fn default_confidence() -> Confidence {
297 Confidence::Low
298}
299
300#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct MatchBreakdown {
309 pub name_score: Option<f64>,
312 pub name_phonetic_score: Option<f64>,
315 pub start_date_score: Option<f64>,
319 pub end_date_score: Option<f64>,
323 pub location_score: Option<f64>,
326 pub category_score: Option<f64>,
329 pub country_code_score: Option<f64>,
332 pub event_ids_score: Option<f64>,
336 pub organizer_score: Option<f64>,
339 pub performers_score: Option<f64>,
342 pub url_score: Option<f64>,
345}
346
347pub struct MatchingEngine {
361 config: MatchConfig,
362}
363
364impl MatchingEngine {
365 #[must_use]
367 pub fn new(config: MatchConfig) -> Self {
368 Self { config }
369 }
370
371 #[must_use]
373 pub fn default_config() -> Self {
374 Self::new(MatchConfig::default())
375 }
376
377 #[must_use]
395 pub fn match_events(&self, event1: &Event, event2: &Event) -> MatchResult {
396 let breakdown = self.calculate_breakdown(event1, event2);
397 let score = self.calculate_weighted_score(&breakdown);
398 let above_threshold = score >= self.config.match_threshold;
399 let is_match = if self.config.strict_mode {
400 above_threshold && self.deterministic_match(event1, event2)
401 } else {
402 above_threshold
403 };
404 let confidence = Confidence::from_score(score);
405
406 MatchResult {
407 score,
408 is_match,
409 confidence,
410 breakdown,
411 }
412 }
413
414 #[must_use]
432 pub fn match_one_to_many(&self, query: &Event, candidates: &[Event]) -> Vec<MatchResult> {
433 candidates
434 .iter()
435 .map(|c| self.match_events(query, c))
436 .collect()
437 }
438
439 #[must_use]
443 pub fn rank_one_to_many(
444 &self,
445 query: &Event,
446 candidates: &[Event],
447 ) -> Vec<(usize, MatchResult)> {
448 let mut indexed: Vec<(usize, MatchResult)> = self
449 .match_one_to_many(query, candidates)
450 .into_iter()
451 .enumerate()
452 .collect();
453 indexed.sort_by(|a, b| {
454 b.1.score
455 .partial_cmp(&a.1.score)
456 .unwrap_or(std::cmp::Ordering::Equal)
457 .then_with(|| a.0.cmp(&b.0))
458 });
459 indexed
460 }
461
462 #[must_use]
480 pub fn deterministic_match(&self, event1: &Event, event2: &Event) -> bool {
481 if shares_event_id(event1, event2) {
482 return true;
483 }
484 name_and_start_date_match(event1, event2)
485 }
486
487 fn calculate_breakdown(&self, event1: &Event, event2: &Event) -> MatchBreakdown {
488 MatchBreakdown {
489 name_score: self.score_name(event1, event2),
490 name_phonetic_score: if self.config.use_phonetic_matching {
491 Self::score_phonetic_names(event1, event2)
492 } else {
493 None
494 },
495 start_date_score: self.score_start_date(event1, event2),
496 end_date_score: self.score_end_date(event1, event2),
497 location_score: self.score_location(event1, event2),
498 category_score: score_category(event1, event2),
499 country_code_score: score_country_code(event1, event2),
500 event_ids_score: score_event_ids(event1, event2),
501 organizer_score: Self::score_organizer(event1, event2),
502 performers_score: Self::score_performers(event1, event2),
503 url_score: score_url(event1, event2),
504 }
505 }
506
507 fn calculate_weighted_score(&self, breakdown: &MatchBreakdown) -> f64 {
508 let mut total_weight = 0.0;
509 let mut weighted_sum = 0.0;
510
511 let mut accumulate = |opt: Option<f64>, weight: f64| {
512 if let Some(score) = opt {
513 weighted_sum += score * weight;
514 total_weight += weight;
515 }
516 };
517
518 accumulate(breakdown.name_score, self.config.name_weight);
519 accumulate(breakdown.start_date_score, self.config.start_date_weight);
520 accumulate(breakdown.end_date_score, self.config.end_date_weight);
521 accumulate(breakdown.location_score, self.config.location_weight);
522 accumulate(breakdown.category_score, self.config.category_weight);
523 accumulate(
524 breakdown.country_code_score,
525 self.config.country_code_weight,
526 );
527 accumulate(breakdown.event_ids_score, self.config.event_ids_weight);
528 accumulate(breakdown.organizer_score, self.config.organizer_weight);
529 accumulate(breakdown.performers_score, self.config.performers_weight);
530 accumulate(breakdown.url_score, self.config.url_weight);
531
532 if let Some(score) = breakdown.name_phonetic_score
534 && score > 0.9
535 {
536 weighted_sum += score * 0.05;
537 total_weight += 0.05;
538 }
539
540 if total_weight > 0.0 {
541 weighted_sum / total_weight
542 } else {
543 0.0
544 }
545 }
546
547 fn score_name(&self, e1: &Event, e2: &Event) -> Option<f64> {
548 let names1 = collect_names(e1);
549 let names2 = collect_names(e2);
550 if names1.is_empty() || names2.is_empty() {
551 return None;
552 }
553 let mut best = f64::NEG_INFINITY;
554 for n1 in &names1 {
555 for n2 in &names2 {
556 let s = self.score_name_pair(n1, n2);
557 if s > best {
558 best = s;
559 }
560 }
561 }
562 Some(best)
563 }
564
565 fn score_name_pair(&self, name1: &str, name2: &str) -> f64 {
566 let norm1 = Normalizer::normalize_name(name1);
567 let norm2 = Normalizer::normalize_name(name2);
568 match self.config.name_algorithm {
569 SimilarityAlgorithm::JaroWinkler => Scorer::jaro_winkler_similarity(&norm1, &norm2),
570 SimilarityAlgorithm::Levenshtein => Scorer::levenshtein_similarity(&norm1, &norm2),
571 SimilarityAlgorithm::Exact => Scorer::exact_match(&norm1, &norm2),
572 SimilarityAlgorithm::Combined => Scorer::combined_similarity(&norm1, &norm2),
573 }
574 }
575
576 fn score_phonetic_names(e1: &Event, e2: &Event) -> Option<f64> {
577 let names1 = collect_names(e1);
578 let names2 = collect_names(e2);
579 if names1.is_empty() || names2.is_empty() {
580 return None;
581 }
582 let codes1: Vec<String> = names1
583 .iter()
584 .map(|n| Normalizer::phonetic_code(n))
585 .collect();
586 let codes2: Vec<String> = names2
587 .iter()
588 .map(|n| Normalizer::phonetic_code(n))
589 .collect();
590 let mut best = 0.0_f64;
591 for c1 in &codes1 {
592 for c2 in &codes2 {
593 if !c1.is_empty() && c1 == c2 {
594 best = 1.0;
595 }
596 }
597 }
598 Some(best)
599 }
600
601 #[allow(clippy::cast_precision_loss)]
605 fn score_start_date(&self, e1: &Event, e2: &Event) -> Option<f64> {
606 let d = Scorer::seconds_between(e1.start_date.as_deref()?, e2.start_date.as_deref()?)?;
607 Some(Scorer::start_date_score(
608 d as f64,
609 self.config.start_date_scale_seconds,
610 ))
611 }
612
613 #[allow(clippy::cast_precision_loss)]
614 fn score_end_date(&self, e1: &Event, e2: &Event) -> Option<f64> {
615 let d = Scorer::seconds_between(e1.end_date.as_deref()?, e2.end_date.as_deref()?)?;
616 Some(Scorer::start_date_score(
617 d as f64,
618 self.config.start_date_scale_seconds,
619 ))
620 }
621
622 fn score_location(&self, e1: &Event, e2: &Event) -> Option<f64> {
623 match (e1.location.as_ref(), e2.location.as_ref()) {
624 (Some(l1), Some(l2)) => Some(self.compare_locations(l1, l2)),
625 _ => None,
626 }
627 }
628
629 fn compare_locations(&self, l1: &Location, l2: &Location) -> f64 {
630 let mut weighted_sum = 0.0_f64;
636 let mut total_weight = 0.0_f64;
637
638 if let (Some(lat1), Some(lon1), Some(lat2), Some(lon2)) =
639 (l1.latitude, l1.longitude, l2.latitude, l2.longitude)
640 && let (Some((la1, lo1)), Some((la2, lo2))) = (
641 valid_coords(Some(lat1), Some(lon1)),
642 valid_coords(Some(lat2), Some(lon2)),
643 )
644 {
645 let d = Scorer::haversine_metres(la1, lo1, la2, lo2);
646 weighted_sum +=
647 Scorer::coordinates_score(d, self.config.coordinates_scale_metres) * 0.5;
648 total_weight += 0.5;
649 }
650
651 if let (Some(a1), Some(a2)) = (l1.address.as_ref(), l2.address.as_ref()) {
652 weighted_sum += compare_addresses(a1, a2) * 0.3;
653 total_weight += 0.3;
654 }
655
656 if let (Some(v1), Some(v2)) = (l1.venue_name.as_deref(), l2.venue_name.as_deref()) {
657 let n1 = Normalizer::normalize_name(v1);
658 let n2 = Normalizer::normalize_name(v2);
659 weighted_sum += Scorer::combined_similarity(&n1, &n2) * 0.15;
660 total_weight += 0.15;
661 }
662
663 if let (Some(u1), Some(u2)) = (l1.virtual_url.as_deref(), l2.virtual_url.as_deref()) {
664 weighted_sum += f64::from(u1.trim() == u2.trim()) * 0.05;
665 total_weight += 0.05;
666 }
667
668 if total_weight == 0.0 {
669 0.5
670 } else {
671 weighted_sum / total_weight
672 }
673 }
674
675 fn score_organizer(e1: &Event, e2: &Event) -> Option<f64> {
676 let o1 = e1.organizer.as_deref()?;
677 let o2 = e2.organizer.as_deref()?;
678 let n1 = Normalizer::normalize_name(o1);
679 let n2 = Normalizer::normalize_name(o2);
680 Some(Scorer::combined_similarity(&n1, &n2))
681 }
682
683 fn score_performers(e1: &Event, e2: &Event) -> Option<f64> {
684 if e1.performers.is_empty() || e2.performers.is_empty() {
685 return None;
686 }
687 let mut best = 0.0_f64;
688 for a in &e1.performers {
689 for b in &e2.performers {
690 let na = Normalizer::normalize_name(a);
691 let nb = Normalizer::normalize_name(b);
692 let s = Scorer::combined_similarity(&na, &nb);
693 if s > best {
694 best = s;
695 }
696 }
697 }
698 Some(best)
699 }
700}
701
702fn collect_names(event: &Event) -> Vec<&String> {
707 event
708 .name
709 .iter()
710 .chain(event.alternate_names.iter())
711 .filter(|s| !s.trim().is_empty())
712 .collect()
713}
714
715fn valid_coords(lat: Option<f64>, lon: Option<f64>) -> Option<(f64, f64)> {
717 let lat = lat?;
718 let lon = lon?;
719 if !lat.is_finite() || !lon.is_finite() {
720 return None;
721 }
722 if !(-90.0..=90.0).contains(&lat) || !(-180.0..=180.0).contains(&lon) {
723 return None;
724 }
725 Some((lat, lon))
726}
727
728fn score_category(e1: &Event, e2: &Event) -> Option<f64> {
729 match (&e1.category, &e2.category) {
730 (Some(a), Some(b)) => Some(if a == b { 1.0 } else { 0.0 }),
731 _ => None,
732 }
733}
734
735fn score_country_code(e1: &Event, e2: &Event) -> Option<f64> {
736 let a = e1.country_code_as_iso_3166_1_alpha_2.as_ref()?;
737 let b = e2.country_code_as_iso_3166_1_alpha_2.as_ref()?;
738 let na = a.trim().to_ascii_lowercase();
739 let nb = b.trim().to_ascii_lowercase();
740 Some(if na == nb { 1.0 } else { 0.0 })
741}
742
743fn shares_event_id(e1: &Event, e2: &Event) -> bool {
744 if e1.event_ids.is_empty() || e2.event_ids.is_empty() {
745 return false;
746 }
747 for id1 in &e1.event_ids {
748 for id2 in &e2.event_ids {
749 if id1 == id2 {
750 return true;
751 }
752 }
753 }
754 false
755}
756
757fn score_event_ids(e1: &Event, e2: &Event) -> Option<f64> {
758 if e1.event_ids.is_empty() || e2.event_ids.is_empty() {
759 return None;
760 }
761 Some(if shares_event_id(e1, e2) { 1.0 } else { 0.0 })
762}
763
764fn score_url(e1: &Event, e2: &Event) -> Option<f64> {
765 let u1 = e1.url.as_deref()?;
766 let u2 = e2.url.as_deref()?;
767 Some(f64::from(u1.trim() == u2.trim()))
768}
769
770fn name_and_start_date_match(e1: &Event, e2: &Event) -> bool {
771 let (Some(n1), Some(n2)) = (&e1.name, &e2.name) else {
772 return false;
773 };
774 if Normalizer::normalize_name(n1) != Normalizer::normalize_name(n2) {
775 return false;
776 }
777 let (Some(sd1), Some(sd2)) = (&e1.start_date, &e2.start_date) else {
778 return false;
779 };
780 match (
781 Normalizer::parse_iso8601_unix_seconds(sd1),
782 Normalizer::parse_iso8601_unix_seconds(sd2),
783 ) {
784 (Some(a), Some(b)) => a == b,
785 _ => false,
786 }
787}
788
789fn compare_addresses(addr1: &Address, addr2: &Address) -> f64 {
793 let mut weighted_sum = 0.0_f64;
794 let mut total_weight = 0.0_f64;
795
796 if let (Some(pc1), Some(pc2)) = (&addr1.postcode, &addr2.postcode) {
797 let norm1 = Normalizer::normalize_postcode(pc1);
798 let norm2 = Normalizer::normalize_postcode(pc2);
799 weighted_sum += f64::from(norm1 == norm2) * 0.5;
800 total_weight += 0.5;
801 }
802
803 if let (Some(city1), Some(city2)) = (&addr1.city, &addr2.city) {
804 let norm1 = Normalizer::normalize_name(city1);
805 let norm2 = Normalizer::normalize_name(city2);
806 weighted_sum += Scorer::jaro_winkler_similarity(&norm1, &norm2) * 0.3;
807 total_weight += 0.3;
808 }
809
810 if let (Some(line1), Some(line2)) = (&addr1.line1, &addr2.line1) {
811 let parsed1 = Normalizer::parse_address_line(line1);
812 let parsed2 = Normalizer::parse_address_line(line2);
813 let street_sim = Scorer::jaro_winkler_similarity(&parsed1.street, &parsed2.street);
814 let house_score = match (&parsed1.house_number, &parsed2.house_number) {
815 (Some(a), Some(b)) => Some(f64::from(a == b)),
816 _ => None,
817 };
818 let line1_score = match house_score {
819 Some(h) => 0.6 * street_sim + 0.4 * h,
820 None => street_sim,
821 };
822 weighted_sum += line1_score * 0.2;
823 total_weight += 0.2;
824 }
825
826 if total_weight == 0.0 {
827 0.5
828 } else {
829 weighted_sum / total_weight
830 }
831}
832
833#[cfg(test)]
834#[allow(clippy::float_cmp)]
837mod tests {
838 use super::*;
839 use crate::models::{EventCategory, EventId, EventIdScheme};
840
841 #[test]
844 fn config_default_values() {
845 let c = MatchConfig::default();
846 assert!((c.match_threshold - 0.80).abs() < 1e-9);
847 assert!(!c.strict_mode);
848 }
849
850 #[test]
851 fn config_strict_raises_threshold_and_sets_flag() {
852 let c = MatchConfig::strict();
853 assert!((c.match_threshold - 0.95).abs() < 1e-9);
854 assert!(c.strict_mode);
855 }
856
857 #[test]
858 fn config_lenient_lowers_threshold() {
859 let c = MatchConfig::lenient();
860 assert!((c.match_threshold - 0.65).abs() < 1e-9);
861 assert!(c.use_phonetic_matching);
862 }
863
864 #[test]
867 fn config_default_round_trips_through_json() {
868 let cfg = MatchConfig::default();
869 let json = serde_json::to_string(&cfg).expect("serialise");
870 let back: MatchConfig = serde_json::from_str(&json).expect("deserialise");
871 assert!((cfg.match_threshold - back.match_threshold).abs() < 1e-12);
872 assert!((cfg.name_weight - back.name_weight).abs() < 1e-12);
873 assert!((cfg.start_date_weight - back.start_date_weight).abs() < 1e-12);
874 assert!(matches!(back.name_algorithm, SimilarityAlgorithm::Combined));
875 assert_eq!(cfg.strict_mode, back.strict_mode);
876 }
877
878 #[test]
879 fn config_partial_json_fills_missing_fields_from_default() {
880 let partial = r#"{"match_threshold": 0.80}"#;
881 let cfg: MatchConfig = serde_json::from_str(partial).expect("partial json");
882 assert!((cfg.match_threshold - 0.80).abs() < 1e-12);
883 assert!(matches!(cfg.name_algorithm, SimilarityAlgorithm::Combined));
884 }
885
886 #[test]
889 fn exact_clone_is_a_match() {
890 let e = Event::builder()
891 .name("RustConf 2024")
892 .start_date("2024-09-10T09:00:00Z")
893 .build();
894 let result = MatchingEngine::default_config().match_events(&e, &e.clone());
895 assert!(result.is_match);
896 assert!(result.score > 0.95);
897 }
898
899 #[test]
900 fn name_match_takes_best_of_cartesian_product() {
901 let p1 = Event::builder().name("RustConf 2024").build();
902 let p2 = Event::builder()
903 .name("Rust Conference 2024")
904 .add_alternate_name("RustConf 2024")
905 .build();
906 let r = MatchingEngine::default_config().match_events(&p1, &p2);
907 let s = r.breakdown.name_score.expect("scored");
908 assert!(s > 0.99, "got {s}");
909 }
910
911 #[test]
912 fn unrelated_events_do_not_match() {
913 let a = Event::builder()
914 .name("RustConf 2024")
915 .start_date("2024-09-10T09:00:00Z")
916 .build();
917 let b = Event::builder()
918 .name("Sydney Opera Concert")
919 .start_date("2025-03-15T20:00:00Z")
920 .build();
921 let r = MatchingEngine::default_config().match_events(&a, &b);
922 assert!(!r.is_match);
923 assert!(r.score < 0.5);
924 }
925
926 #[test]
927 fn no_overlapping_fields_returns_zero_score() {
928 let a = Event::builder().url("https://example.org/a").build();
929 let b = Event::builder().url("https://example.org/b").build();
930 let r = MatchingEngine::default_config().match_events(&a, &b);
931 assert_eq!(r.score, 0.0);
932 }
933
934 #[test]
937 fn start_date_score_one_when_identical() {
938 let a = Event::builder()
939 .name("X")
940 .start_date("2024-06-26T09:00:00Z")
941 .build();
942 let b = a.clone();
943 let r = MatchingEngine::default_config().match_events(&a, &b);
944 assert!((r.breakdown.start_date_score.unwrap() - 1.0).abs() < 1e-9);
945 }
946
947 #[test]
948 fn start_date_score_decays_with_time_gap() {
949 let a = Event::builder()
950 .name("X")
951 .start_date("2024-06-26T09:00:00Z")
952 .build();
953 let b = Event::builder()
954 .name("X")
955 .start_date("2024-07-26T09:00:00Z")
956 .build();
957 let r = MatchingEngine::default_config().match_events(&a, &b);
958 assert!(r.breakdown.start_date_score.unwrap() < 1e-3);
959 }
960
961 #[test]
962 fn start_date_score_none_when_one_side_missing() {
963 let a = Event::builder().name("X").start_date("2024-06-26").build();
964 let b = Event::builder().name("X").build();
965 let r = MatchingEngine::default_config().match_events(&a, &b);
966 assert!(r.breakdown.start_date_score.is_none());
967 }
968
969 #[test]
970 fn start_date_score_none_when_garbage() {
971 let a = Event::builder().name("X").start_date("not-a-date").build();
972 let b = Event::builder().name("X").start_date("2024-06-26").build();
973 let r = MatchingEngine::default_config().match_events(&a, &b);
974 assert!(r.breakdown.start_date_score.is_none());
975 }
976
977 #[test]
980 fn category_equality_scores_one_else_zero() {
981 let a = Event::builder()
982 .name("X")
983 .category(EventCategory::MusicEvent)
984 .build();
985 let b = Event::builder()
986 .name("X")
987 .category(EventCategory::MusicEvent)
988 .build();
989 let c = Event::builder()
990 .name("X")
991 .category(EventCategory::ComedyEvent)
992 .build();
993 let engine = MatchingEngine::default_config();
994 assert_eq!(
995 engine.match_events(&a, &b).breakdown.category_score,
996 Some(1.0)
997 );
998 assert_eq!(
999 engine.match_events(&a, &c).breakdown.category_score,
1000 Some(0.0)
1001 );
1002 }
1003
1004 #[test]
1005 fn category_score_none_when_either_missing() {
1006 let a = Event::builder()
1007 .name("X")
1008 .category(EventCategory::MusicEvent)
1009 .build();
1010 let b = Event::builder().name("X").build();
1011 let r = MatchingEngine::default_config().match_events(&a, &b);
1012 assert!(r.breakdown.category_score.is_none());
1013 }
1014
1015 #[test]
1018 fn country_code_case_insensitive_equality() {
1019 let a = Event::builder()
1020 .name("X")
1021 .country_code_as_iso_3166_1_alpha_2("gb")
1022 .build();
1023 let b = Event::builder()
1024 .name("X")
1025 .country_code_as_iso_3166_1_alpha_2("GB")
1026 .build();
1027 let r = MatchingEngine::default_config().match_events(&a, &b);
1028 assert_eq!(r.breakdown.country_code_score, Some(1.0));
1029 }
1030
1031 #[test]
1032 fn country_code_mismatch_scores_zero() {
1033 let a = Event::builder()
1034 .name("X")
1035 .country_code_as_iso_3166_1_alpha_2("GB")
1036 .build();
1037 let b = Event::builder()
1038 .name("X")
1039 .country_code_as_iso_3166_1_alpha_2("FR")
1040 .build();
1041 let r = MatchingEngine::default_config().match_events(&a, &b);
1042 assert_eq!(r.breakdown.country_code_score, Some(0.0));
1043 }
1044
1045 #[test]
1048 fn event_ids_shared_scores_one() {
1049 let id = EventId::new(EventIdScheme::Eventbrite, "12345").unwrap();
1050 let a = Event::builder().name("X").add_event_id(id.clone()).build();
1051 let b = Event::builder().name("X").add_event_id(id).build();
1052 let r = MatchingEngine::default_config().match_events(&a, &b);
1053 assert_eq!(r.breakdown.event_ids_score, Some(1.0));
1054 }
1055
1056 #[test]
1057 fn event_ids_scheme_scoped_no_cross_match() {
1058 let a = Event::builder()
1059 .name("X")
1060 .add_event_id(EventId::new(EventIdScheme::Eventbrite, "X").unwrap())
1061 .build();
1062 let b = Event::builder()
1063 .name("X")
1064 .add_event_id(EventId::new(EventIdScheme::Meetup, "X").unwrap())
1065 .build();
1066 let r = MatchingEngine::default_config().match_events(&a, &b);
1067 assert_eq!(r.breakdown.event_ids_score, Some(0.0));
1068 }
1069
1070 #[test]
1071 fn event_ids_none_when_either_side_empty() {
1072 let a = Event::builder().name("X").build();
1073 let b = Event::builder()
1074 .name("X")
1075 .add_event_id(EventId::new(EventIdScheme::Eventbrite, "Q1").unwrap())
1076 .build();
1077 let r = MatchingEngine::default_config().match_events(&a, &b);
1078 assert!(r.breakdown.event_ids_score.is_none());
1079 }
1080
1081 #[test]
1084 fn deterministic_via_shared_event_id() {
1085 let id = EventId::new(EventIdScheme::Eventbrite, "12345").unwrap();
1086 let a = Event::builder()
1087 .name("RustConf 2024")
1088 .add_event_id(id.clone())
1089 .build();
1090 let b = Event::builder()
1091 .name("Wholly Different")
1092 .add_event_id(id)
1093 .build();
1094 assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
1095 }
1096
1097 #[test]
1098 fn deterministic_via_name_and_start_date() {
1099 let a = Event::builder()
1100 .name("RustConf 2024")
1101 .start_date("2024-09-10T09:00:00Z")
1102 .build();
1103 let b = Event::builder()
1104 .name("RustConf 2024")
1105 .start_date("2024-09-10T09:00:00Z")
1106 .build();
1107 assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
1108 }
1109
1110 #[test]
1111 fn deterministic_via_name_and_start_date_accepts_equivalent_offsets() {
1112 let a = Event::builder()
1113 .name("RustConf 2024")
1114 .start_date("2024-09-10T09:00:00Z")
1115 .build();
1116 let b = Event::builder()
1117 .name("RustConf 2024")
1118 .start_date("2024-09-10T11:00:00+02:00")
1119 .build();
1120 assert!(MatchingEngine::default_config().deterministic_match(&a, &b));
1121 }
1122
1123 #[test]
1124 fn deterministic_rejects_when_name_differs_and_no_shared_id() {
1125 let a = Event::builder()
1126 .name("X")
1127 .start_date("2024-09-10T09:00:00Z")
1128 .build();
1129 let b = Event::builder()
1130 .name("Y")
1131 .start_date("2024-09-10T09:00:00Z")
1132 .build();
1133 assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
1134 }
1135
1136 #[test]
1137 fn deterministic_rejects_when_start_date_missing_and_no_shared_id() {
1138 let a = Event::builder().name("X").build();
1139 let b = Event::builder().name("X").build();
1140 assert!(!MatchingEngine::default_config().deterministic_match(&a, &b));
1141 }
1142
1143 #[test]
1146 fn strict_mode_requires_deterministic_for_is_match() {
1147 let cfg = MatchConfig {
1148 match_threshold: 0.50,
1149 strict_mode: true,
1150 ..MatchConfig::default()
1151 };
1152 let e1 = Event::builder()
1153 .name("Cafe Centrale Concert")
1154 .start_date("2024-09-10T09:00:00Z")
1155 .build();
1156 let e2 = Event::builder()
1157 .name("Cafe Central Concert") .start_date("2024-09-10T09:00:00Z")
1159 .build();
1160 let engine = MatchingEngine::new(cfg);
1161 let r = engine.match_events(&e1, &e2);
1162 assert!(r.score >= 0.50);
1163 assert!(!engine.deterministic_match(&e1, &e2));
1164 assert!(!r.is_match);
1165 }
1166
1167 #[test]
1170 fn match_one_to_many_empty_candidates_yields_empty_vec() {
1171 let engine = MatchingEngine::default_config();
1172 let q = Event::builder().name("Solo").build();
1173 assert!(engine.match_one_to_many(&q, &[]).is_empty());
1174 }
1175
1176 #[test]
1177 fn rank_one_to_many_sorts_by_score_descending() {
1178 let engine = MatchingEngine::default_config();
1179 let q = Event::builder().name("RustConf 2024").build();
1180 let candidates = vec![
1181 Event::builder().name("PyConf 2024").build(),
1182 q.clone(),
1183 Event::builder().name("GoConf 2024").build(),
1184 ];
1185 let ranked = engine.rank_one_to_many(&q, &candidates);
1186 assert_eq!(ranked[0].0, 1);
1187 for w in ranked.windows(2) {
1188 assert!(w[0].1.score >= w[1].1.score);
1189 }
1190 }
1191
1192 #[test]
1195 fn confidence_band_boundaries_are_inclusive_on_the_low_side() {
1196 assert_eq!(Confidence::from_score(0.90), Confidence::High);
1197 assert_eq!(Confidence::from_score(0.89), Confidence::Medium);
1198 assert_eq!(Confidence::from_score(0.75), Confidence::Medium);
1199 assert_eq!(Confidence::from_score(0.74), Confidence::Low);
1200 }
1201
1202 #[test]
1205 fn location_postcode_match_dominates() {
1206 let l1 = Location::new().with_address(Address::new().with_postcode("BA4 4BY"));
1207 let l2 = Location::new().with_address(Address::new().with_postcode("BA4 4BY"));
1208 let s = MatchingEngine::default_config().compare_locations(&l1, &l2);
1209 assert!((s - 1.0).abs() < 1e-9, "got {s}");
1210 }
1211
1212 #[test]
1213 fn location_score_none_when_either_side_absent() {
1214 let a = Event::builder()
1215 .name("X")
1216 .location(Location::new().with_venue_name("Worthy Farm"))
1217 .build();
1218 let b = Event::builder().name("X").build();
1219 let r = MatchingEngine::default_config().match_events(&a, &b);
1220 assert!(r.breakdown.location_score.is_none());
1221 }
1222
1223 #[test]
1226 fn organizer_match_after_normalisation() {
1227 let a = Event::builder()
1228 .name("X")
1229 .organizer("Rust Foundation")
1230 .build();
1231 let b = Event::builder()
1232 .name("X")
1233 .organizer("rust foundation")
1234 .build();
1235 let r = MatchingEngine::default_config().match_events(&a, &b);
1236 assert!(r.breakdown.organizer_score.unwrap() > 0.99);
1237 }
1238
1239 #[test]
1240 fn performers_match_takes_best_of_cartesian_product() {
1241 let a = Event::builder()
1242 .name("X")
1243 .add_performer("Niko Matsakis")
1244 .add_performer("Tyler Mandry")
1245 .build();
1246 let b = Event::builder()
1247 .name("X")
1248 .add_performer("Carol Nichols")
1249 .add_performer("Niko Matsakis")
1250 .build();
1251 let r = MatchingEngine::default_config().match_events(&a, &b);
1252 assert!(r.breakdown.performers_score.unwrap() > 0.99);
1253 }
1254
1255 #[test]
1256 fn url_match_is_exact_after_trim() {
1257 let a = Event::builder()
1258 .name("X")
1259 .url("https://rustconf.com")
1260 .build();
1261 let b = Event::builder()
1262 .name("X")
1263 .url(" https://rustconf.com ")
1264 .build();
1265 let r = MatchingEngine::default_config().match_events(&a, &b);
1266 assert_eq!(r.breakdown.url_score, Some(1.0));
1267 }
1268
1269 #[test]
1272 fn phonetic_score_none_when_off() {
1273 let p = Event::builder().name("Stephen Concert").build();
1274 let q = Event::builder().name("Steven Concert").build();
1275 let r = MatchingEngine::new(MatchConfig {
1276 use_phonetic_matching: false,
1277 ..MatchConfig::default()
1278 })
1279 .match_events(&p, &q);
1280 assert!(r.breakdown.name_phonetic_score.is_none());
1281 }
1282
1283 #[test]
1284 fn phonetic_score_some_when_on() {
1285 let p = Event::builder().name("Stephen").build();
1286 let q = Event::builder().name("Steven").build();
1287 let r = MatchingEngine::new(MatchConfig {
1288 use_phonetic_matching: true,
1289 ..MatchConfig::default()
1290 })
1291 .match_events(&p, &q);
1292 assert!(r.breakdown.name_phonetic_score.is_some());
1293 }
1294}