1use crate::model::Component;
7
8#[derive(Debug, Clone)]
10pub struct MatchResult {
11 pub score: f64,
13 pub tier: MatchTier,
15 pub metadata: MatchMetadata,
17}
18
19impl MatchResult {
20 pub fn new(score: f64, tier: MatchTier) -> Self {
22 Self {
23 score,
24 tier,
25 metadata: MatchMetadata::default(),
26 }
27 }
28
29 pub fn with_metadata(score: f64, tier: MatchTier, metadata: MatchMetadata) -> Self {
31 Self {
32 score,
33 tier,
34 metadata,
35 }
36 }
37
38 pub fn no_match() -> Self {
40 Self {
41 score: 0.0,
42 tier: MatchTier::None,
43 metadata: MatchMetadata::default(),
44 }
45 }
46
47 pub fn is_match(&self) -> bool {
49 self.score > 0.0 && self.tier != MatchTier::None
50 }
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
55pub enum MatchTier {
56 None,
58 ExactIdentifier,
60 Alias,
62 EcosystemRule,
64 Fuzzy,
66 CustomRule,
68}
69
70impl MatchTier {
71 pub fn default_score(&self) -> f64 {
73 match self {
74 MatchTier::None => 0.0,
75 MatchTier::ExactIdentifier => 1.0,
76 MatchTier::Alias => 0.95,
77 MatchTier::EcosystemRule => 0.90,
78 MatchTier::CustomRule => 0.92,
79 MatchTier::Fuzzy => 0.80,
80 }
81 }
82}
83
84#[derive(Debug, Clone, Default)]
86pub struct MatchMetadata {
87 pub matched_fields: Vec<String>,
89 pub normalization: Option<String>,
91 pub rule_id: Option<String>,
93}
94
95#[derive(Debug, Clone)]
99pub struct MatchExplanation {
100 pub tier: MatchTier,
102 pub score: f64,
104 pub reason: String,
106 pub score_breakdown: Vec<ScoreComponent>,
108 pub normalizations_applied: Vec<String>,
110 pub is_match: bool,
112}
113
114#[derive(Debug, Clone)]
116pub struct ScoreComponent {
117 pub name: String,
119 pub weight: f64,
121 pub raw_score: f64,
123 pub weighted_score: f64,
125 pub description: String,
127}
128
129impl MatchExplanation {
130 pub fn matched(tier: MatchTier, score: f64, reason: impl Into<String>) -> Self {
132 Self {
133 tier,
134 score,
135 reason: reason.into(),
136 score_breakdown: Vec::new(),
137 normalizations_applied: Vec::new(),
138 is_match: true,
139 }
140 }
141
142 pub fn no_match(reason: impl Into<String>) -> Self {
144 Self {
145 tier: MatchTier::None,
146 score: 0.0,
147 reason: reason.into(),
148 score_breakdown: Vec::new(),
149 normalizations_applied: Vec::new(),
150 is_match: false,
151 }
152 }
153
154 pub fn with_score_component(mut self, component: ScoreComponent) -> Self {
156 self.score_breakdown.push(component);
157 self
158 }
159
160 pub fn with_normalization(mut self, normalization: impl Into<String>) -> Self {
162 self.normalizations_applied.push(normalization.into());
163 self
164 }
165
166 pub fn summary(&self) -> String {
168 if self.is_match {
169 format!(
170 "MATCH ({:.0}% confidence via {:?}): {}",
171 self.score * 100.0,
172 self.tier,
173 self.reason
174 )
175 } else {
176 format!("NO MATCH: {}", self.reason)
177 }
178 }
179
180 pub fn detailed(&self) -> String {
182 let mut lines = vec![self.summary()];
183
184 if !self.score_breakdown.is_empty() {
185 lines.push("Score breakdown:".to_string());
186 for component in &self.score_breakdown {
187 lines.push(format!(
188 " - {}: {:.2} × {:.2} = {:.2} ({})",
189 component.name,
190 component.raw_score,
191 component.weight,
192 component.weighted_score,
193 component.description
194 ));
195 }
196 }
197
198 if !self.normalizations_applied.is_empty() {
199 lines.push(format!(
200 "Normalizations: {}",
201 self.normalizations_applied.join(", ")
202 ));
203 }
204
205 lines.join("\n")
206 }
207}
208
209impl std::fmt::Display for MatchExplanation {
210 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
211 write!(f, "{}", self.summary())
212 }
213}
214
215pub trait ComponentMatcher: Send + Sync {
229 fn match_score(&self, a: &Component, b: &Component) -> f64;
233
234 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
238 let score = self.match_score(a, b);
239 if score > 0.0 {
240 MatchResult::new(score, MatchTier::Fuzzy)
241 } else {
242 MatchResult::no_match()
243 }
244 }
245
246 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
250 let result = self.match_detailed(a, b);
251 if result.is_match() {
252 MatchExplanation::matched(
253 result.tier,
254 result.score,
255 format!("'{}' matches '{}' via {:?}", a.name, b.name, result.tier),
256 )
257 } else {
258 MatchExplanation::no_match(format!(
259 "'{}' does not match '{}' (score {:.2} below threshold)",
260 a.name, b.name, result.score
261 ))
262 }
263 }
264
265 fn find_best_match<'a>(
269 &self,
270 target: &Component,
271 candidates: &'a [&Component],
272 threshold: f64,
273 ) -> Option<(&'a Component, f64)> {
274 candidates
275 .iter()
276 .map(|c| (*c, self.match_score(target, c)))
277 .filter(|(_, score)| *score >= threshold)
278 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
279 }
280
281 fn name(&self) -> &str {
283 "ComponentMatcher"
284 }
285
286 fn threshold(&self) -> f64 {
288 0.0
289 }
290}
291
292#[derive(Debug, Clone)]
294pub struct CacheConfig {
295 pub max_entries: usize,
297 pub cache_detailed: bool,
299}
300
301impl Default for CacheConfig {
302 fn default() -> Self {
303 Self {
304 max_entries: 100_000,
305 cache_detailed: false,
306 }
307 }
308}
309
310impl CacheConfig {
311 pub fn small() -> Self {
313 Self {
314 max_entries: 10_000,
315 cache_detailed: true,
316 }
317 }
318
319 pub fn large() -> Self {
321 Self {
322 max_entries: 500_000,
323 cache_detailed: false,
324 }
325 }
326}
327
328#[derive(Hash, Eq, PartialEq, Clone)]
330struct CacheKey {
331 hash: u64,
332}
333
334impl CacheKey {
335 fn new(a_id: &str, b_id: &str) -> Self {
336 use xxhash_rust::xxh3::xxh3_64;
337
338 let (first, second) = if a_id < b_id {
340 (a_id, b_id)
341 } else {
342 (b_id, a_id)
343 };
344
345 let combined = format!("{}|{}", first, second);
346 Self {
347 hash: xxh3_64(combined.as_bytes()),
348 }
349 }
350}
351
352#[derive(Clone)]
354struct CacheEntry {
355 score: f64,
356 detailed: Option<MatchResult>,
357}
358
359pub struct CachedMatcher<M: ComponentMatcher> {
376 inner: M,
377 config: CacheConfig,
378 cache: std::sync::RwLock<std::collections::HashMap<CacheKey, CacheEntry>>,
379 stats: std::sync::atomic::AtomicUsize,
380 hits: std::sync::atomic::AtomicUsize,
381}
382
383impl<M: ComponentMatcher> CachedMatcher<M> {
384 pub fn new(inner: M) -> Self {
386 Self::with_config(inner, CacheConfig::default())
387 }
388
389 pub fn with_config(inner: M, config: CacheConfig) -> Self {
391 Self {
392 inner,
393 config,
394 cache: std::sync::RwLock::new(std::collections::HashMap::new()),
395 stats: std::sync::atomic::AtomicUsize::new(0),
396 hits: std::sync::atomic::AtomicUsize::new(0),
397 }
398 }
399
400 pub fn inner(&self) -> &M {
402 &self.inner
403 }
404
405 pub fn cache_stats(&self) -> CacheStats {
407 let total = self.stats.load(std::sync::atomic::Ordering::Relaxed);
408 let hits = self.hits.load(std::sync::atomic::Ordering::Relaxed);
409 let size = self.cache.read().map(|c| c.len()).unwrap_or(0);
410 CacheStats {
411 total_lookups: total,
412 cache_hits: hits,
413 cache_misses: total.saturating_sub(hits),
414 hit_rate: if total > 0 {
415 hits as f64 / total as f64
416 } else {
417 0.0
418 },
419 cache_size: size,
420 }
421 }
422
423 pub fn clear_cache(&self) {
425 if let Ok(mut cache) = self.cache.write() {
426 cache.clear();
427 }
428 self.stats.store(0, std::sync::atomic::Ordering::Relaxed);
429 self.hits.store(0, std::sync::atomic::Ordering::Relaxed);
430 }
431
432 fn get_cached(&self, key: &CacheKey) -> Option<CacheEntry> {
434 self.stats
435 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
436 if let Ok(cache) = self.cache.read() {
437 if let Some(entry) = cache.get(key) {
438 self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
439 return Some(entry.clone());
440 }
441 }
442 None
443 }
444
445 fn store_cached(&self, key: CacheKey, entry: CacheEntry) {
447 if let Ok(mut cache) = self.cache.write() {
448 if cache.len() >= self.config.max_entries {
450 let to_remove: Vec<CacheKey> = cache
451 .keys()
452 .take(self.config.max_entries / 2)
453 .cloned()
454 .collect();
455 for k in to_remove {
456 cache.remove(&k);
457 }
458 }
459 cache.insert(key, entry);
460 }
461 }
462}
463
464#[derive(Debug, Clone)]
466pub struct CacheStats {
467 pub total_lookups: usize,
469 pub cache_hits: usize,
471 pub cache_misses: usize,
473 pub hit_rate: f64,
475 pub cache_size: usize,
477}
478
479impl<M: ComponentMatcher> ComponentMatcher for CachedMatcher<M> {
480 fn match_score(&self, a: &Component, b: &Component) -> f64 {
481 let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
482
483 if let Some(entry) = self.get_cached(&key) {
485 return entry.score;
486 }
487
488 let score = self.inner.match_score(a, b);
490 self.store_cached(
491 key,
492 CacheEntry {
493 score,
494 detailed: None,
495 },
496 );
497 score
498 }
499
500 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
501 if !self.config.cache_detailed {
502 return self.inner.match_detailed(a, b);
503 }
504
505 let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
506
507 if let Some(entry) = self.get_cached(&key) {
509 if let Some(detailed) = entry.detailed {
510 return detailed;
511 }
512 }
513
514 let result = self.inner.match_detailed(a, b);
516 self.store_cached(
517 key,
518 CacheEntry {
519 score: result.score,
520 detailed: Some(result.clone()),
521 },
522 );
523 result
524 }
525
526 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
527 self.inner.explain_match(a, b)
529 }
530
531 fn name(&self) -> &str {
532 "CachedMatcher"
533 }
534
535 fn threshold(&self) -> f64 {
536 self.inner.threshold()
537 }
538}
539
540pub struct CompositeMatcherBuilder {
542 matchers: Vec<Box<dyn ComponentMatcher>>,
543}
544
545impl CompositeMatcherBuilder {
546 pub fn new() -> Self {
548 Self {
549 matchers: Vec::new(),
550 }
551 }
552
553 pub fn with_matcher(mut self, matcher: Box<dyn ComponentMatcher>) -> Self {
555 self.matchers.push(matcher);
556 self
557 }
558
559 pub fn build(self) -> CompositeMatcher {
561 CompositeMatcher {
562 matchers: self.matchers,
563 }
564 }
565}
566
567impl Default for CompositeMatcherBuilder {
568 fn default() -> Self {
569 Self::new()
570 }
571}
572
573pub struct CompositeMatcher {
575 matchers: Vec<Box<dyn ComponentMatcher>>,
576}
577
578impl ComponentMatcher for CompositeMatcher {
579 fn match_score(&self, a: &Component, b: &Component) -> f64 {
580 self.matchers
582 .iter()
583 .map(|m| m.match_score(a, b))
584 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
585 .unwrap_or(0.0)
586 }
587
588 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
589 self.matchers
591 .iter()
592 .map(|m| m.match_detailed(a, b))
593 .max_by(|a, b| {
594 a.score
595 .partial_cmp(&b.score)
596 .unwrap_or(std::cmp::Ordering::Equal)
597 })
598 .unwrap_or_else(MatchResult::no_match)
599 }
600
601 fn name(&self) -> &str {
602 "CompositeMatcher"
603 }
604}
605
606#[cfg(test)]
607mod tests {
608 use super::*;
609
610 struct FixedScoreMatcher(f64);
612
613 impl ComponentMatcher for FixedScoreMatcher {
614 fn match_score(&self, _a: &Component, _b: &Component) -> f64 {
615 self.0
616 }
617
618 fn name(&self) -> &str {
619 "FixedScoreMatcher"
620 }
621 }
622
623 #[test]
624 fn test_match_result_creation() {
625 let result = MatchResult::new(0.95, MatchTier::Alias);
626 assert_eq!(result.score, 0.95);
627 assert_eq!(result.tier, MatchTier::Alias);
628 assert!(result.is_match());
629 }
630
631 #[test]
632 fn test_no_match_result() {
633 let result = MatchResult::no_match();
634 assert_eq!(result.score, 0.0);
635 assert_eq!(result.tier, MatchTier::None);
636 assert!(!result.is_match());
637 }
638
639 #[test]
640 fn test_match_tier_default_scores() {
641 assert_eq!(MatchTier::ExactIdentifier.default_score(), 1.0);
642 assert_eq!(MatchTier::Alias.default_score(), 0.95);
643 assert_eq!(MatchTier::EcosystemRule.default_score(), 0.90);
644 assert_eq!(MatchTier::None.default_score(), 0.0);
645 }
646
647 #[test]
648 fn test_composite_matcher() {
649 let matcher = CompositeMatcherBuilder::new()
650 .with_matcher(Box::new(FixedScoreMatcher(0.5)))
651 .with_matcher(Box::new(FixedScoreMatcher(0.8)))
652 .with_matcher(Box::new(FixedScoreMatcher(0.3)))
653 .build();
654
655 let comp_a = Component::new("test".to_string(), "id-1".to_string());
656 let comp_b = Component::new("test".to_string(), "id-2".to_string());
657
658 assert_eq!(matcher.match_score(&comp_a, &comp_b), 0.8);
660 }
661
662 #[test]
663 fn test_find_best_match() {
664 let matcher = FixedScoreMatcher(0.85);
665 let target = Component::new("target".to_string(), "id-0".to_string());
666 let candidates: Vec<Component> = vec![
667 Component::new("candidate1".to_string(), "id-1".to_string()),
668 Component::new("candidate2".to_string(), "id-2".to_string()),
669 ];
670 let candidate_refs: Vec<&Component> = candidates.iter().collect();
671
672 let result = matcher.find_best_match(&target, &candidate_refs, 0.8);
674 assert!(result.is_some());
675
676 let result = matcher.find_best_match(&target, &candidate_refs, 0.9);
678 assert!(result.is_none());
679 }
680
681 #[test]
682 fn test_match_explanation_matched() {
683 let explanation =
684 MatchExplanation::matched(MatchTier::ExactIdentifier, 1.0, "Test match reason");
685
686 assert!(explanation.is_match);
687 assert_eq!(explanation.score, 1.0);
688 assert_eq!(explanation.tier, MatchTier::ExactIdentifier);
689 assert!(explanation.summary().contains("MATCH"));
690 assert!(explanation.summary().contains("100%"));
691 }
692
693 #[test]
694 fn test_match_explanation_no_match() {
695 let explanation = MatchExplanation::no_match("Components are too different");
696
697 assert!(!explanation.is_match);
698 assert_eq!(explanation.score, 0.0);
699 assert_eq!(explanation.tier, MatchTier::None);
700 assert!(explanation.summary().contains("NO MATCH"));
701 }
702
703 #[test]
704 fn test_match_explanation_with_breakdown() {
705 let explanation = MatchExplanation::matched(MatchTier::Fuzzy, 0.85, "Fuzzy match")
706 .with_score_component(ScoreComponent {
707 name: "Jaro-Winkler".to_string(),
708 weight: 0.7,
709 raw_score: 0.9,
710 weighted_score: 0.63,
711 description: "name similarity".to_string(),
712 })
713 .with_score_component(ScoreComponent {
714 name: "Levenshtein".to_string(),
715 weight: 0.3,
716 raw_score: 0.73,
717 weighted_score: 0.22,
718 description: "edit distance".to_string(),
719 })
720 .with_normalization("lowercase");
721
722 assert_eq!(explanation.score_breakdown.len(), 2);
723 assert_eq!(explanation.normalizations_applied.len(), 1);
724
725 let detailed = explanation.detailed();
726 assert!(detailed.contains("Score breakdown:"));
727 assert!(detailed.contains("Jaro-Winkler"));
728 assert!(detailed.contains("Normalizations: lowercase"));
729 }
730
731 #[test]
732 fn test_match_explanation_display() {
733 let explanation = MatchExplanation::matched(MatchTier::Alias, 0.95, "Known alias");
734 let display = format!("{}", explanation);
735 assert!(display.contains("MATCH"));
736 assert!(display.contains("95%"));
737 }
738}