1use crate::model::Component;
7
8#[derive(Debug, Clone)]
10#[must_use]
11pub struct MatchResult {
12 pub score: f64,
14 pub tier: MatchTier,
16 pub metadata: MatchMetadata,
18}
19
20impl MatchResult {
21 pub fn new(score: f64, tier: MatchTier) -> Self {
23 Self {
24 score,
25 tier,
26 metadata: MatchMetadata::default(),
27 }
28 }
29
30 pub const fn with_metadata(score: f64, tier: MatchTier, metadata: MatchMetadata) -> Self {
32 Self {
33 score,
34 tier,
35 metadata,
36 }
37 }
38
39 pub fn no_match() -> Self {
41 Self {
42 score: 0.0,
43 tier: MatchTier::None,
44 metadata: MatchMetadata::default(),
45 }
46 }
47
48 #[must_use]
50 pub fn is_match(&self) -> bool {
51 self.score > 0.0 && self.tier != MatchTier::None
52 }
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
57#[non_exhaustive]
58pub enum MatchTier {
59 None,
61 ExactIdentifier,
63 Alias,
65 EcosystemRule,
67 Fuzzy,
69 CustomRule,
71}
72
73impl MatchTier {
74 #[must_use]
76 pub const fn default_score(&self) -> f64 {
77 match self {
78 Self::None => 0.0,
79 Self::ExactIdentifier => 1.0,
80 Self::Alias => 0.95,
81 Self::EcosystemRule => 0.90,
82 Self::CustomRule => 0.92,
83 Self::Fuzzy => 0.80,
84 }
85 }
86}
87
88#[derive(Debug, Clone, Default)]
90pub struct MatchMetadata {
91 pub matched_fields: Vec<String>,
93 pub normalization: Option<String>,
95 pub rule_id: Option<String>,
97}
98
99#[derive(Debug, Clone)]
103pub struct MatchExplanation {
104 pub tier: MatchTier,
106 pub score: f64,
108 pub reason: String,
110 pub score_breakdown: Vec<ScoreComponent>,
112 pub normalizations_applied: Vec<String>,
114 pub is_match: bool,
116}
117
118#[derive(Debug, Clone)]
120pub struct ScoreComponent {
121 pub name: String,
123 pub weight: f64,
125 pub raw_score: f64,
127 pub weighted_score: f64,
129 pub description: String,
131}
132
133impl MatchExplanation {
134 pub fn matched(tier: MatchTier, score: f64, reason: impl Into<String>) -> Self {
136 Self {
137 tier,
138 score,
139 reason: reason.into(),
140 score_breakdown: Vec::new(),
141 normalizations_applied: Vec::new(),
142 is_match: true,
143 }
144 }
145
146 pub fn no_match(reason: impl Into<String>) -> Self {
148 Self {
149 tier: MatchTier::None,
150 score: 0.0,
151 reason: reason.into(),
152 score_breakdown: Vec::new(),
153 normalizations_applied: Vec::new(),
154 is_match: false,
155 }
156 }
157
158 #[must_use]
160 pub fn with_score_component(mut self, component: ScoreComponent) -> Self {
161 self.score_breakdown.push(component);
162 self
163 }
164
165 #[must_use]
167 pub fn with_normalization(mut self, normalization: impl Into<String>) -> Self {
168 self.normalizations_applied.push(normalization.into());
169 self
170 }
171
172 #[must_use]
174 pub fn summary(&self) -> String {
175 if self.is_match {
176 format!(
177 "MATCH ({:.0}% confidence via {:?}): {}",
178 self.score * 100.0,
179 self.tier,
180 self.reason
181 )
182 } else {
183 format!("NO MATCH: {}", self.reason)
184 }
185 }
186
187 #[must_use]
189 pub fn detailed(&self) -> String {
190 let mut lines = vec![self.summary()];
191
192 if !self.score_breakdown.is_empty() {
193 lines.push("Score breakdown:".to_string());
194 for component in &self.score_breakdown {
195 lines.push(format!(
196 " - {}: {:.2} × {:.2} = {:.2} ({})",
197 component.name,
198 component.raw_score,
199 component.weight,
200 component.weighted_score,
201 component.description
202 ));
203 }
204 }
205
206 if !self.normalizations_applied.is_empty() {
207 lines.push(format!(
208 "Normalizations: {}",
209 self.normalizations_applied.join(", ")
210 ));
211 }
212
213 lines.join("\n")
214 }
215}
216
217impl std::fmt::Display for MatchExplanation {
218 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
219 write!(f, "{}", self.summary())
220 }
221}
222
223pub trait ComponentMatcher: Send + Sync {
237 fn match_score(&self, a: &Component, b: &Component) -> f64;
241
242 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
246 let score = self.match_score(a, b);
247 if score > 0.0 {
248 MatchResult::new(score, MatchTier::Fuzzy)
249 } else {
250 MatchResult::no_match()
251 }
252 }
253
254 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
258 let result = self.match_detailed(a, b);
259 if result.is_match() {
260 MatchExplanation::matched(
261 result.tier,
262 result.score,
263 format!("'{}' matches '{}' via {:?}", a.name, b.name, result.tier),
264 )
265 } else {
266 MatchExplanation::no_match(format!(
267 "'{}' does not match '{}' (score {:.2} below threshold)",
268 a.name, b.name, result.score
269 ))
270 }
271 }
272
273 fn find_best_match<'a>(
277 &self,
278 target: &Component,
279 candidates: &'a [&Component],
280 threshold: f64,
281 ) -> Option<(&'a Component, f64)> {
282 candidates
283 .iter()
284 .map(|c| (*c, self.match_score(target, c)))
285 .filter(|(_, score)| *score >= threshold)
286 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
287 }
288
289 fn name(&self) -> &'static str {
291 "ComponentMatcher"
292 }
293
294 fn threshold(&self) -> f64 {
296 0.0
297 }
298}
299
300#[derive(Debug, Clone)]
302pub struct CacheConfig {
303 pub max_entries: usize,
305 pub cache_detailed: bool,
307}
308
309impl Default for CacheConfig {
310 fn default() -> Self {
311 Self {
312 max_entries: 100_000,
313 cache_detailed: false,
314 }
315 }
316}
317
318impl CacheConfig {
319 #[must_use]
321 pub const fn small() -> Self {
322 Self {
323 max_entries: 10_000,
324 cache_detailed: true,
325 }
326 }
327
328 #[must_use]
330 pub const fn large() -> Self {
331 Self {
332 max_entries: 500_000,
333 cache_detailed: false,
334 }
335 }
336}
337
338#[derive(Hash, Eq, PartialEq, Clone)]
340struct CacheKey {
341 hash: u64,
342}
343
344impl CacheKey {
345 fn new(a_id: &str, b_id: &str) -> Self {
346 use xxhash_rust::xxh3::xxh3_64;
347
348 let (first, second) = if a_id < b_id {
350 (a_id, b_id)
351 } else {
352 (b_id, a_id)
353 };
354
355 let combined = format!("{first}|{second}");
356 Self {
357 hash: xxh3_64(combined.as_bytes()),
358 }
359 }
360}
361
362#[derive(Clone)]
364struct CacheEntry {
365 score: f64,
366 detailed: Option<MatchResult>,
367}
368
369pub struct CachedMatcher<M: ComponentMatcher> {
386 inner: M,
387 config: CacheConfig,
388 cache: std::sync::RwLock<std::collections::HashMap<CacheKey, CacheEntry>>,
389 stats: std::sync::atomic::AtomicUsize,
390 hits: std::sync::atomic::AtomicUsize,
391}
392
393impl<M: ComponentMatcher> CachedMatcher<M> {
394 pub fn new(inner: M) -> Self {
396 Self::with_config(inner, CacheConfig::default())
397 }
398
399 pub fn with_config(inner: M, config: CacheConfig) -> Self {
401 Self {
402 inner,
403 config,
404 cache: std::sync::RwLock::new(std::collections::HashMap::new()),
405 stats: std::sync::atomic::AtomicUsize::new(0),
406 hits: std::sync::atomic::AtomicUsize::new(0),
407 }
408 }
409
410 pub const fn inner(&self) -> &M {
412 &self.inner
413 }
414
415 pub fn cache_stats(&self) -> CacheStats {
417 let total = self.stats.load(std::sync::atomic::Ordering::Relaxed);
418 let hits = self.hits.load(std::sync::atomic::Ordering::Relaxed);
419 let size = self.cache.read().map(|c| c.len()).unwrap_or(0);
420 CacheStats {
421 total_lookups: total,
422 cache_hits: hits,
423 cache_misses: total.saturating_sub(hits),
424 hit_rate: if total > 0 {
425 hits as f64 / total as f64
426 } else {
427 0.0
428 },
429 cache_size: size,
430 }
431 }
432
433 pub fn clear_cache(&self) {
435 if let Ok(mut cache) = self.cache.write() {
436 cache.clear();
437 }
438 self.stats.store(0, std::sync::atomic::Ordering::Relaxed);
439 self.hits.store(0, std::sync::atomic::Ordering::Relaxed);
440 }
441
442 fn get_cached(&self, key: &CacheKey) -> Option<CacheEntry> {
444 self.stats
445 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
446 if let Ok(cache) = self.cache.read()
447 && let Some(entry) = cache.get(key)
448 {
449 self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
450 return Some(entry.clone());
451 }
452 None
453 }
454
455 fn store_cached(&self, key: CacheKey, entry: CacheEntry) {
457 if let Ok(mut cache) = self.cache.write() {
458 if cache.len() >= self.config.max_entries {
460 let to_remove: Vec<CacheKey> = cache
461 .keys()
462 .take(self.config.max_entries / 2)
463 .cloned()
464 .collect();
465 for k in to_remove {
466 cache.remove(&k);
467 }
468 }
469 cache.insert(key, entry);
470 }
471 }
472}
473
474#[derive(Debug, Clone)]
476pub struct CacheStats {
477 pub total_lookups: usize,
479 pub cache_hits: usize,
481 pub cache_misses: usize,
483 pub hit_rate: f64,
485 pub cache_size: usize,
487}
488
489impl<M: ComponentMatcher> ComponentMatcher for CachedMatcher<M> {
490 fn match_score(&self, a: &Component, b: &Component) -> f64 {
491 let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
492
493 if let Some(entry) = self.get_cached(&key) {
495 return entry.score;
496 }
497
498 let score = self.inner.match_score(a, b);
500 self.store_cached(
501 key,
502 CacheEntry {
503 score,
504 detailed: None,
505 },
506 );
507 score
508 }
509
510 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
511 if !self.config.cache_detailed {
512 return self.inner.match_detailed(a, b);
513 }
514
515 let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
516
517 if let Some(entry) = self.get_cached(&key)
519 && let Some(detailed) = entry.detailed
520 {
521 return detailed;
522 }
523
524 let result = self.inner.match_detailed(a, b);
526 self.store_cached(
527 key,
528 CacheEntry {
529 score: result.score,
530 detailed: Some(result.clone()),
531 },
532 );
533 result
534 }
535
536 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
537 self.inner.explain_match(a, b)
539 }
540
541 fn name(&self) -> &'static str {
542 "CachedMatcher"
543 }
544
545 fn threshold(&self) -> f64 {
546 self.inner.threshold()
547 }
548}
549
550#[must_use]
552pub struct CompositeMatcherBuilder {
553 matchers: Vec<Box<dyn ComponentMatcher>>,
554}
555
556impl CompositeMatcherBuilder {
557 pub fn new() -> Self {
559 Self {
560 matchers: Vec::new(),
561 }
562 }
563
564 pub fn with_matcher(mut self, matcher: Box<dyn ComponentMatcher>) -> Self {
566 self.matchers.push(matcher);
567 self
568 }
569
570 #[must_use]
572 pub fn build(self) -> CompositeMatcher {
573 CompositeMatcher {
574 matchers: self.matchers,
575 }
576 }
577}
578
579impl Default for CompositeMatcherBuilder {
580 fn default() -> Self {
581 Self::new()
582 }
583}
584
585pub struct CompositeMatcher {
587 matchers: Vec<Box<dyn ComponentMatcher>>,
588}
589
590impl ComponentMatcher for CompositeMatcher {
591 fn match_score(&self, a: &Component, b: &Component) -> f64 {
592 self.matchers
594 .iter()
595 .map(|m| m.match_score(a, b))
596 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
597 .unwrap_or(0.0)
598 }
599
600 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
601 self.matchers
603 .iter()
604 .map(|m| m.match_detailed(a, b))
605 .max_by(|a, b| {
606 a.score
607 .partial_cmp(&b.score)
608 .unwrap_or(std::cmp::Ordering::Equal)
609 })
610 .unwrap_or_else(MatchResult::no_match)
611 }
612
613 fn name(&self) -> &'static str {
614 "CompositeMatcher"
615 }
616}
617
618#[cfg(test)]
619mod tests {
620 use super::*;
621
622 struct FixedScoreMatcher(f64);
624
625 impl ComponentMatcher for FixedScoreMatcher {
626 fn match_score(&self, _a: &Component, _b: &Component) -> f64 {
627 self.0
628 }
629
630 fn name(&self) -> &'static str {
631 "FixedScoreMatcher"
632 }
633 }
634
635 #[test]
636 fn test_match_result_creation() {
637 let result = MatchResult::new(0.95, MatchTier::Alias);
638 assert_eq!(result.score, 0.95);
639 assert_eq!(result.tier, MatchTier::Alias);
640 assert!(result.is_match());
641 }
642
643 #[test]
644 fn test_no_match_result() {
645 let result = MatchResult::no_match();
646 assert_eq!(result.score, 0.0);
647 assert_eq!(result.tier, MatchTier::None);
648 assert!(!result.is_match());
649 }
650
651 #[test]
652 fn test_match_tier_default_scores() {
653 assert_eq!(MatchTier::ExactIdentifier.default_score(), 1.0);
654 assert_eq!(MatchTier::Alias.default_score(), 0.95);
655 assert_eq!(MatchTier::EcosystemRule.default_score(), 0.90);
656 assert_eq!(MatchTier::None.default_score(), 0.0);
657 }
658
659 #[test]
660 fn test_composite_matcher() {
661 let matcher = CompositeMatcherBuilder::new()
662 .with_matcher(Box::new(FixedScoreMatcher(0.5)))
663 .with_matcher(Box::new(FixedScoreMatcher(0.8)))
664 .with_matcher(Box::new(FixedScoreMatcher(0.3)))
665 .build();
666
667 let comp_a = Component::new("test".to_string(), "id-1".to_string());
668 let comp_b = Component::new("test".to_string(), "id-2".to_string());
669
670 assert_eq!(matcher.match_score(&comp_a, &comp_b), 0.8);
672 }
673
674 #[test]
675 fn test_find_best_match() {
676 let matcher = FixedScoreMatcher(0.85);
677 let target = Component::new("target".to_string(), "id-0".to_string());
678 let candidates: Vec<Component> = vec![
679 Component::new("candidate1".to_string(), "id-1".to_string()),
680 Component::new("candidate2".to_string(), "id-2".to_string()),
681 ];
682 let candidate_refs: Vec<&Component> = candidates.iter().collect();
683
684 let result = matcher.find_best_match(&target, &candidate_refs, 0.8);
686 assert!(result.is_some());
687
688 let result = matcher.find_best_match(&target, &candidate_refs, 0.9);
690 assert!(result.is_none());
691 }
692
693 #[test]
694 fn test_match_explanation_matched() {
695 let explanation =
696 MatchExplanation::matched(MatchTier::ExactIdentifier, 1.0, "Test match reason");
697
698 assert!(explanation.is_match);
699 assert_eq!(explanation.score, 1.0);
700 assert_eq!(explanation.tier, MatchTier::ExactIdentifier);
701 assert!(explanation.summary().contains("MATCH"));
702 assert!(explanation.summary().contains("100%"));
703 }
704
705 #[test]
706 fn test_match_explanation_no_match() {
707 let explanation = MatchExplanation::no_match("Components are too different");
708
709 assert!(!explanation.is_match);
710 assert_eq!(explanation.score, 0.0);
711 assert_eq!(explanation.tier, MatchTier::None);
712 assert!(explanation.summary().contains("NO MATCH"));
713 }
714
715 #[test]
716 fn test_match_explanation_with_breakdown() {
717 let explanation = MatchExplanation::matched(MatchTier::Fuzzy, 0.85, "Fuzzy match")
718 .with_score_component(ScoreComponent {
719 name: "Jaro-Winkler".to_string(),
720 weight: 0.7,
721 raw_score: 0.9,
722 weighted_score: 0.63,
723 description: "name similarity".to_string(),
724 })
725 .with_score_component(ScoreComponent {
726 name: "Levenshtein".to_string(),
727 weight: 0.3,
728 raw_score: 0.73,
729 weighted_score: 0.22,
730 description: "edit distance".to_string(),
731 })
732 .with_normalization("lowercase");
733
734 assert_eq!(explanation.score_breakdown.len(), 2);
735 assert_eq!(explanation.normalizations_applied.len(), 1);
736
737 let detailed = explanation.detailed();
738 assert!(detailed.contains("Score breakdown:"));
739 assert!(detailed.contains("Jaro-Winkler"));
740 assert!(detailed.contains("Normalizations: lowercase"));
741 }
742
743 #[test]
744 fn test_match_explanation_display() {
745 let explanation = MatchExplanation::matched(MatchTier::Alias, 0.95, "Known alias");
746 let display = format!("{}", explanation);
747 assert!(display.contains("MATCH"));
748 assert!(display.contains("95%"));
749 }
750}