1use crate::model::Component;
7
8#[derive(Debug, Clone)]
10#[must_use]
11pub struct MatchResult {
12 pub score: f64,
14 pub tier: MatchTier,
16 pub metadata: MatchMetadata,
18}
19
20impl MatchResult {
21 pub fn new(score: f64, tier: MatchTier) -> Self {
23 Self {
24 score,
25 tier,
26 metadata: MatchMetadata::default(),
27 }
28 }
29
30 pub const fn with_metadata(score: f64, tier: MatchTier, metadata: MatchMetadata) -> Self {
32 Self {
33 score,
34 tier,
35 metadata,
36 }
37 }
38
39 pub fn no_match() -> Self {
41 Self {
42 score: 0.0,
43 tier: MatchTier::None,
44 metadata: MatchMetadata::default(),
45 }
46 }
47
48 #[must_use]
50 pub fn is_match(&self) -> bool {
51 self.score > 0.0 && self.tier != MatchTier::None
52 }
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
57#[non_exhaustive]
58pub enum MatchTier {
59 None,
61 ExactIdentifier,
63 Alias,
65 EcosystemRule,
67 Fuzzy,
69 CustomRule,
71}
72
73impl MatchTier {
74 #[must_use]
76 pub const fn default_score(&self) -> f64 {
77 match self {
78 Self::None => 0.0,
79 Self::ExactIdentifier => 1.0,
80 Self::Alias => 0.95,
81 Self::EcosystemRule => 0.90,
82 Self::CustomRule => 0.92,
83 Self::Fuzzy => 0.80,
84 }
85 }
86}
87
88#[derive(Debug, Clone, Default)]
90pub struct MatchMetadata {
91 pub matched_fields: Vec<String>,
93 pub normalization: Option<String>,
95 pub rule_id: Option<String>,
97}
98
99#[derive(Debug, Clone)]
103pub struct MatchExplanation {
104 pub tier: MatchTier,
106 pub score: f64,
108 pub reason: String,
110 pub score_breakdown: Vec<ScoreComponent>,
112 pub normalizations_applied: Vec<String>,
114 pub is_match: bool,
116}
117
118#[derive(Debug, Clone)]
120pub struct ScoreComponent {
121 pub name: String,
123 pub weight: f64,
125 pub raw_score: f64,
127 pub weighted_score: f64,
129 pub description: String,
131}
132
133impl MatchExplanation {
134 pub fn matched(tier: MatchTier, score: f64, reason: impl Into<String>) -> Self {
136 Self {
137 tier,
138 score,
139 reason: reason.into(),
140 score_breakdown: Vec::new(),
141 normalizations_applied: Vec::new(),
142 is_match: true,
143 }
144 }
145
146 pub fn no_match(reason: impl Into<String>) -> Self {
148 Self {
149 tier: MatchTier::None,
150 score: 0.0,
151 reason: reason.into(),
152 score_breakdown: Vec::new(),
153 normalizations_applied: Vec::new(),
154 is_match: false,
155 }
156 }
157
158 #[must_use]
160 pub fn with_score_component(mut self, component: ScoreComponent) -> Self {
161 self.score_breakdown.push(component);
162 self
163 }
164
165 #[must_use]
167 pub fn with_normalization(mut self, normalization: impl Into<String>) -> Self {
168 self.normalizations_applied.push(normalization.into());
169 self
170 }
171
172 #[must_use]
174 pub fn summary(&self) -> String {
175 if self.is_match {
176 format!(
177 "MATCH ({:.0}% confidence via {:?}): {}",
178 self.score * 100.0,
179 self.tier,
180 self.reason
181 )
182 } else {
183 format!("NO MATCH: {}", self.reason)
184 }
185 }
186
187 #[must_use]
189 pub fn detailed(&self) -> String {
190 let mut lines = vec![self.summary()];
191
192 if !self.score_breakdown.is_empty() {
193 lines.push("Score breakdown:".to_string());
194 for component in &self.score_breakdown {
195 lines.push(format!(
196 " - {}: {:.2} × {:.2} = {:.2} ({})",
197 component.name,
198 component.raw_score,
199 component.weight,
200 component.weighted_score,
201 component.description
202 ));
203 }
204 }
205
206 if !self.normalizations_applied.is_empty() {
207 lines.push(format!(
208 "Normalizations: {}",
209 self.normalizations_applied.join(", ")
210 ));
211 }
212
213 lines.join("\n")
214 }
215}
216
217impl std::fmt::Display for MatchExplanation {
218 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
219 write!(f, "{}", self.summary())
220 }
221}
222
223pub trait ComponentMatcher: Send + Sync {
237 fn match_score(&self, a: &Component, b: &Component) -> f64;
241
242 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
246 let score = self.match_score(a, b);
247 if score > 0.0 {
248 MatchResult::new(score, MatchTier::Fuzzy)
249 } else {
250 MatchResult::no_match()
251 }
252 }
253
254 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
258 let result = self.match_detailed(a, b);
259 if result.is_match() {
260 MatchExplanation::matched(
261 result.tier,
262 result.score,
263 format!("'{}' matches '{}' via {:?}", a.name, b.name, result.tier),
264 )
265 } else {
266 MatchExplanation::no_match(format!(
267 "'{}' does not match '{}' (score {:.2} below threshold)",
268 a.name, b.name, result.score
269 ))
270 }
271 }
272
273 fn find_best_match<'a>(
277 &self,
278 target: &Component,
279 candidates: &'a [&Component],
280 threshold: f64,
281 ) -> Option<(&'a Component, f64)> {
282 candidates
283 .iter()
284 .map(|c| (*c, self.match_score(target, c)))
285 .filter(|(_, score)| *score >= threshold)
286 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
287 }
288
289 fn name(&self) -> &'static str {
291 "ComponentMatcher"
292 }
293
294 fn threshold(&self) -> f64 {
296 0.0
297 }
298}
299
300#[derive(Debug, Clone)]
302pub struct CacheConfig {
303 pub max_entries: usize,
305 pub cache_detailed: bool,
307}
308
309impl Default for CacheConfig {
310 fn default() -> Self {
311 Self {
312 max_entries: 100_000,
313 cache_detailed: false,
314 }
315 }
316}
317
318impl CacheConfig {
319 #[must_use]
321 pub const fn small() -> Self {
322 Self {
323 max_entries: 10_000,
324 cache_detailed: true,
325 }
326 }
327
328 #[must_use]
330 pub const fn large() -> Self {
331 Self {
332 max_entries: 500_000,
333 cache_detailed: false,
334 }
335 }
336}
337
338#[derive(Hash, Eq, PartialEq, Clone)]
340struct CacheKey {
341 hash: u64,
342}
343
344impl CacheKey {
345 fn new(a_id: &str, b_id: &str) -> Self {
346 use xxhash_rust::xxh3::xxh3_64;
347
348 let (first, second) = if a_id < b_id {
350 (a_id, b_id)
351 } else {
352 (b_id, a_id)
353 };
354
355 let combined = format!("{first}|{second}");
356 Self {
357 hash: xxh3_64(combined.as_bytes()),
358 }
359 }
360}
361
362#[derive(Clone)]
364struct CacheEntry {
365 score: f64,
366 detailed: Option<MatchResult>,
367}
368
369pub struct CachedMatcher<M: ComponentMatcher> {
386 inner: M,
387 config: CacheConfig,
388 cache: std::sync::RwLock<std::collections::HashMap<CacheKey, CacheEntry>>,
389 stats: std::sync::atomic::AtomicUsize,
390 hits: std::sync::atomic::AtomicUsize,
391}
392
393impl<M: ComponentMatcher> CachedMatcher<M> {
394 pub fn new(inner: M) -> Self {
396 Self::with_config(inner, CacheConfig::default())
397 }
398
399 pub fn with_config(inner: M, config: CacheConfig) -> Self {
401 Self {
402 inner,
403 config,
404 cache: std::sync::RwLock::new(std::collections::HashMap::new()),
405 stats: std::sync::atomic::AtomicUsize::new(0),
406 hits: std::sync::atomic::AtomicUsize::new(0),
407 }
408 }
409
410 pub const fn inner(&self) -> &M {
412 &self.inner
413 }
414
415 pub fn cache_stats(&self) -> CacheStats {
417 let total = self.stats.load(std::sync::atomic::Ordering::Relaxed);
418 let hits = self.hits.load(std::sync::atomic::Ordering::Relaxed);
419 let size = self.cache.read().map(|c| c.len()).unwrap_or(0);
420 CacheStats {
421 total_lookups: total,
422 cache_hits: hits,
423 cache_misses: total.saturating_sub(hits),
424 hit_rate: if total > 0 {
425 hits as f64 / total as f64
426 } else {
427 0.0
428 },
429 cache_size: size,
430 }
431 }
432
433 pub fn clear_cache(&self) {
435 if let Ok(mut cache) = self.cache.write() {
436 cache.clear();
437 }
438 self.stats.store(0, std::sync::atomic::Ordering::Relaxed);
439 self.hits.store(0, std::sync::atomic::Ordering::Relaxed);
440 }
441
442 fn get_cached(&self, key: &CacheKey) -> Option<CacheEntry> {
444 self.stats
445 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
446 if let Ok(cache) = self.cache.read()
447 && let Some(entry) = cache.get(key) {
448 self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
449 return Some(entry.clone());
450 }
451 None
452 }
453
454 fn store_cached(&self, key: CacheKey, entry: CacheEntry) {
456 if let Ok(mut cache) = self.cache.write() {
457 if cache.len() >= self.config.max_entries {
459 let to_remove: Vec<CacheKey> = cache
460 .keys()
461 .take(self.config.max_entries / 2)
462 .cloned()
463 .collect();
464 for k in to_remove {
465 cache.remove(&k);
466 }
467 }
468 cache.insert(key, entry);
469 }
470 }
471}
472
473#[derive(Debug, Clone)]
475pub struct CacheStats {
476 pub total_lookups: usize,
478 pub cache_hits: usize,
480 pub cache_misses: usize,
482 pub hit_rate: f64,
484 pub cache_size: usize,
486}
487
488impl<M: ComponentMatcher> ComponentMatcher for CachedMatcher<M> {
489 fn match_score(&self, a: &Component, b: &Component) -> f64 {
490 let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
491
492 if let Some(entry) = self.get_cached(&key) {
494 return entry.score;
495 }
496
497 let score = self.inner.match_score(a, b);
499 self.store_cached(
500 key,
501 CacheEntry {
502 score,
503 detailed: None,
504 },
505 );
506 score
507 }
508
509 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
510 if !self.config.cache_detailed {
511 return self.inner.match_detailed(a, b);
512 }
513
514 let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
515
516 if let Some(entry) = self.get_cached(&key)
518 && let Some(detailed) = entry.detailed {
519 return detailed;
520 }
521
522 let result = self.inner.match_detailed(a, b);
524 self.store_cached(
525 key,
526 CacheEntry {
527 score: result.score,
528 detailed: Some(result.clone()),
529 },
530 );
531 result
532 }
533
534 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
535 self.inner.explain_match(a, b)
537 }
538
539 fn name(&self) -> &'static str {
540 "CachedMatcher"
541 }
542
543 fn threshold(&self) -> f64 {
544 self.inner.threshold()
545 }
546}
547
548#[must_use]
550pub struct CompositeMatcherBuilder {
551 matchers: Vec<Box<dyn ComponentMatcher>>,
552}
553
554impl CompositeMatcherBuilder {
555 pub fn new() -> Self {
557 Self {
558 matchers: Vec::new(),
559 }
560 }
561
562 pub fn with_matcher(mut self, matcher: Box<dyn ComponentMatcher>) -> Self {
564 self.matchers.push(matcher);
565 self
566 }
567
568 #[must_use]
570 pub fn build(self) -> CompositeMatcher {
571 CompositeMatcher {
572 matchers: self.matchers,
573 }
574 }
575}
576
577impl Default for CompositeMatcherBuilder {
578 fn default() -> Self {
579 Self::new()
580 }
581}
582
583pub struct CompositeMatcher {
585 matchers: Vec<Box<dyn ComponentMatcher>>,
586}
587
588impl ComponentMatcher for CompositeMatcher {
589 fn match_score(&self, a: &Component, b: &Component) -> f64 {
590 self.matchers
592 .iter()
593 .map(|m| m.match_score(a, b))
594 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
595 .unwrap_or(0.0)
596 }
597
598 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
599 self.matchers
601 .iter()
602 .map(|m| m.match_detailed(a, b))
603 .max_by(|a, b| {
604 a.score
605 .partial_cmp(&b.score)
606 .unwrap_or(std::cmp::Ordering::Equal)
607 })
608 .unwrap_or_else(MatchResult::no_match)
609 }
610
611 fn name(&self) -> &'static str {
612 "CompositeMatcher"
613 }
614}
615
616#[cfg(test)]
617mod tests {
618 use super::*;
619
620 struct FixedScoreMatcher(f64);
622
623 impl ComponentMatcher for FixedScoreMatcher {
624 fn match_score(&self, _a: &Component, _b: &Component) -> f64 {
625 self.0
626 }
627
628 fn name(&self) -> &'static str {
629 "FixedScoreMatcher"
630 }
631 }
632
633 #[test]
634 fn test_match_result_creation() {
635 let result = MatchResult::new(0.95, MatchTier::Alias);
636 assert_eq!(result.score, 0.95);
637 assert_eq!(result.tier, MatchTier::Alias);
638 assert!(result.is_match());
639 }
640
641 #[test]
642 fn test_no_match_result() {
643 let result = MatchResult::no_match();
644 assert_eq!(result.score, 0.0);
645 assert_eq!(result.tier, MatchTier::None);
646 assert!(!result.is_match());
647 }
648
649 #[test]
650 fn test_match_tier_default_scores() {
651 assert_eq!(MatchTier::ExactIdentifier.default_score(), 1.0);
652 assert_eq!(MatchTier::Alias.default_score(), 0.95);
653 assert_eq!(MatchTier::EcosystemRule.default_score(), 0.90);
654 assert_eq!(MatchTier::None.default_score(), 0.0);
655 }
656
657 #[test]
658 fn test_composite_matcher() {
659 let matcher = CompositeMatcherBuilder::new()
660 .with_matcher(Box::new(FixedScoreMatcher(0.5)))
661 .with_matcher(Box::new(FixedScoreMatcher(0.8)))
662 .with_matcher(Box::new(FixedScoreMatcher(0.3)))
663 .build();
664
665 let comp_a = Component::new("test".to_string(), "id-1".to_string());
666 let comp_b = Component::new("test".to_string(), "id-2".to_string());
667
668 assert_eq!(matcher.match_score(&comp_a, &comp_b), 0.8);
670 }
671
672 #[test]
673 fn test_find_best_match() {
674 let matcher = FixedScoreMatcher(0.85);
675 let target = Component::new("target".to_string(), "id-0".to_string());
676 let candidates: Vec<Component> = vec![
677 Component::new("candidate1".to_string(), "id-1".to_string()),
678 Component::new("candidate2".to_string(), "id-2".to_string()),
679 ];
680 let candidate_refs: Vec<&Component> = candidates.iter().collect();
681
682 let result = matcher.find_best_match(&target, &candidate_refs, 0.8);
684 assert!(result.is_some());
685
686 let result = matcher.find_best_match(&target, &candidate_refs, 0.9);
688 assert!(result.is_none());
689 }
690
691 #[test]
692 fn test_match_explanation_matched() {
693 let explanation =
694 MatchExplanation::matched(MatchTier::ExactIdentifier, 1.0, "Test match reason");
695
696 assert!(explanation.is_match);
697 assert_eq!(explanation.score, 1.0);
698 assert_eq!(explanation.tier, MatchTier::ExactIdentifier);
699 assert!(explanation.summary().contains("MATCH"));
700 assert!(explanation.summary().contains("100%"));
701 }
702
703 #[test]
704 fn test_match_explanation_no_match() {
705 let explanation = MatchExplanation::no_match("Components are too different");
706
707 assert!(!explanation.is_match);
708 assert_eq!(explanation.score, 0.0);
709 assert_eq!(explanation.tier, MatchTier::None);
710 assert!(explanation.summary().contains("NO MATCH"));
711 }
712
713 #[test]
714 fn test_match_explanation_with_breakdown() {
715 let explanation = MatchExplanation::matched(MatchTier::Fuzzy, 0.85, "Fuzzy match")
716 .with_score_component(ScoreComponent {
717 name: "Jaro-Winkler".to_string(),
718 weight: 0.7,
719 raw_score: 0.9,
720 weighted_score: 0.63,
721 description: "name similarity".to_string(),
722 })
723 .with_score_component(ScoreComponent {
724 name: "Levenshtein".to_string(),
725 weight: 0.3,
726 raw_score: 0.73,
727 weighted_score: 0.22,
728 description: "edit distance".to_string(),
729 })
730 .with_normalization("lowercase");
731
732 assert_eq!(explanation.score_breakdown.len(), 2);
733 assert_eq!(explanation.normalizations_applied.len(), 1);
734
735 let detailed = explanation.detailed();
736 assert!(detailed.contains("Score breakdown:"));
737 assert!(detailed.contains("Jaro-Winkler"));
738 assert!(detailed.contains("Normalizations: lowercase"));
739 }
740
741 #[test]
742 fn test_match_explanation_display() {
743 let explanation = MatchExplanation::matched(MatchTier::Alias, 0.95, "Known alias");
744 let display = format!("{}", explanation);
745 assert!(display.contains("MATCH"));
746 assert!(display.contains("95%"));
747 }
748}