1use crate::model::Component;
7
8#[derive(Debug, Clone)]
10pub struct MatchResult {
11 pub score: f64,
13 pub tier: MatchTier,
15 pub metadata: MatchMetadata,
17}
18
19impl MatchResult {
20 pub fn new(score: f64, tier: MatchTier) -> Self {
22 Self {
23 score,
24 tier,
25 metadata: MatchMetadata::default(),
26 }
27 }
28
29 pub fn with_metadata(score: f64, tier: MatchTier, metadata: MatchMetadata) -> Self {
31 Self {
32 score,
33 tier,
34 metadata,
35 }
36 }
37
38 pub fn no_match() -> Self {
40 Self {
41 score: 0.0,
42 tier: MatchTier::None,
43 metadata: MatchMetadata::default(),
44 }
45 }
46
47 pub fn is_match(&self) -> bool {
49 self.score > 0.0 && self.tier != MatchTier::None
50 }
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
55pub enum MatchTier {
56 None,
58 ExactIdentifier,
60 Alias,
62 EcosystemRule,
64 Fuzzy,
66 CustomRule,
68 #[cfg(feature = "ml-matching")]
70 Embedding,
71}
72
73impl MatchTier {
74 pub fn default_score(&self) -> f64 {
76 match self {
77 MatchTier::None => 0.0,
78 MatchTier::ExactIdentifier => 1.0,
79 MatchTier::Alias => 0.95,
80 MatchTier::EcosystemRule => 0.90,
81 MatchTier::CustomRule => 0.92,
82 MatchTier::Fuzzy => 0.80,
83 #[cfg(feature = "ml-matching")]
84 MatchTier::Embedding => 0.85,
85 }
86 }
87}
88
89#[derive(Debug, Clone, Default)]
91pub struct MatchMetadata {
92 pub matched_fields: Vec<String>,
94 pub normalization: Option<String>,
96 pub rule_id: Option<String>,
98}
99
100#[derive(Debug, Clone)]
104pub struct MatchExplanation {
105 pub tier: MatchTier,
107 pub score: f64,
109 pub reason: String,
111 pub score_breakdown: Vec<ScoreComponent>,
113 pub normalizations_applied: Vec<String>,
115 pub is_match: bool,
117}
118
119#[derive(Debug, Clone)]
121pub struct ScoreComponent {
122 pub name: String,
124 pub weight: f64,
126 pub raw_score: f64,
128 pub weighted_score: f64,
130 pub description: String,
132}
133
134impl MatchExplanation {
135 pub fn matched(tier: MatchTier, score: f64, reason: impl Into<String>) -> Self {
137 Self {
138 tier,
139 score,
140 reason: reason.into(),
141 score_breakdown: Vec::new(),
142 normalizations_applied: Vec::new(),
143 is_match: true,
144 }
145 }
146
147 pub fn no_match(reason: impl Into<String>) -> Self {
149 Self {
150 tier: MatchTier::None,
151 score: 0.0,
152 reason: reason.into(),
153 score_breakdown: Vec::new(),
154 normalizations_applied: Vec::new(),
155 is_match: false,
156 }
157 }
158
159 pub fn with_score_component(mut self, component: ScoreComponent) -> Self {
161 self.score_breakdown.push(component);
162 self
163 }
164
165 pub fn with_normalization(mut self, normalization: impl Into<String>) -> Self {
167 self.normalizations_applied.push(normalization.into());
168 self
169 }
170
171 pub fn summary(&self) -> String {
173 if self.is_match {
174 format!(
175 "MATCH ({:.0}% confidence via {:?}): {}",
176 self.score * 100.0,
177 self.tier,
178 self.reason
179 )
180 } else {
181 format!("NO MATCH: {}", self.reason)
182 }
183 }
184
185 pub fn detailed(&self) -> String {
187 let mut lines = vec![self.summary()];
188
189 if !self.score_breakdown.is_empty() {
190 lines.push("Score breakdown:".to_string());
191 for component in &self.score_breakdown {
192 lines.push(format!(
193 " - {}: {:.2} × {:.2} = {:.2} ({})",
194 component.name,
195 component.raw_score,
196 component.weight,
197 component.weighted_score,
198 component.description
199 ));
200 }
201 }
202
203 if !self.normalizations_applied.is_empty() {
204 lines.push(format!(
205 "Normalizations: {}",
206 self.normalizations_applied.join(", ")
207 ));
208 }
209
210 lines.join("\n")
211 }
212}
213
214impl std::fmt::Display for MatchExplanation {
215 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216 write!(f, "{}", self.summary())
217 }
218}
219
220pub trait ComponentMatcher: Send + Sync {
234 fn match_score(&self, a: &Component, b: &Component) -> f64;
238
239 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
243 let score = self.match_score(a, b);
244 if score > 0.0 {
245 MatchResult::new(score, MatchTier::Fuzzy)
246 } else {
247 MatchResult::no_match()
248 }
249 }
250
251 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
255 let result = self.match_detailed(a, b);
256 if result.is_match() {
257 MatchExplanation::matched(
258 result.tier,
259 result.score,
260 format!("'{}' matches '{}' via {:?}", a.name, b.name, result.tier),
261 )
262 } else {
263 MatchExplanation::no_match(format!(
264 "'{}' does not match '{}' (score {:.2} below threshold)",
265 a.name, b.name, result.score
266 ))
267 }
268 }
269
270 fn find_best_match<'a>(
274 &self,
275 target: &Component,
276 candidates: &'a [&Component],
277 threshold: f64,
278 ) -> Option<(&'a Component, f64)> {
279 candidates
280 .iter()
281 .map(|c| (*c, self.match_score(target, c)))
282 .filter(|(_, score)| *score >= threshold)
283 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
284 }
285
286 fn name(&self) -> &str {
288 "ComponentMatcher"
289 }
290
291 fn threshold(&self) -> f64 {
293 0.0
294 }
295}
296
297#[derive(Debug, Clone)]
299pub struct CacheConfig {
300 pub max_entries: usize,
302 pub cache_detailed: bool,
304}
305
306impl Default for CacheConfig {
307 fn default() -> Self {
308 Self {
309 max_entries: 100_000,
310 cache_detailed: false,
311 }
312 }
313}
314
315impl CacheConfig {
316 pub fn small() -> Self {
318 Self {
319 max_entries: 10_000,
320 cache_detailed: true,
321 }
322 }
323
324 pub fn large() -> Self {
326 Self {
327 max_entries: 500_000,
328 cache_detailed: false,
329 }
330 }
331}
332
333#[derive(Hash, Eq, PartialEq, Clone)]
335struct CacheKey {
336 hash: u64,
337}
338
339impl CacheKey {
340 fn new(a_id: &str, b_id: &str) -> Self {
341 use xxhash_rust::xxh3::xxh3_64;
342
343 let (first, second) = if a_id < b_id {
345 (a_id, b_id)
346 } else {
347 (b_id, a_id)
348 };
349
350 let combined = format!("{}|{}", first, second);
351 Self {
352 hash: xxh3_64(combined.as_bytes()),
353 }
354 }
355}
356
357#[derive(Clone)]
359struct CacheEntry {
360 score: f64,
361 detailed: Option<MatchResult>,
362}
363
364pub struct CachedMatcher<M: ComponentMatcher> {
381 inner: M,
382 config: CacheConfig,
383 cache: std::sync::RwLock<std::collections::HashMap<CacheKey, CacheEntry>>,
384 stats: std::sync::atomic::AtomicUsize,
385 hits: std::sync::atomic::AtomicUsize,
386}
387
388impl<M: ComponentMatcher> CachedMatcher<M> {
389 pub fn new(inner: M) -> Self {
391 Self::with_config(inner, CacheConfig::default())
392 }
393
394 pub fn with_config(inner: M, config: CacheConfig) -> Self {
396 Self {
397 inner,
398 config,
399 cache: std::sync::RwLock::new(std::collections::HashMap::new()),
400 stats: std::sync::atomic::AtomicUsize::new(0),
401 hits: std::sync::atomic::AtomicUsize::new(0),
402 }
403 }
404
405 pub fn inner(&self) -> &M {
407 &self.inner
408 }
409
410 pub fn cache_stats(&self) -> CacheStats {
412 let total = self.stats.load(std::sync::atomic::Ordering::Relaxed);
413 let hits = self.hits.load(std::sync::atomic::Ordering::Relaxed);
414 let size = self.cache.read().map(|c| c.len()).unwrap_or(0);
415 CacheStats {
416 total_lookups: total,
417 cache_hits: hits,
418 cache_misses: total.saturating_sub(hits),
419 hit_rate: if total > 0 {
420 hits as f64 / total as f64
421 } else {
422 0.0
423 },
424 cache_size: size,
425 }
426 }
427
428 pub fn clear_cache(&self) {
430 if let Ok(mut cache) = self.cache.write() {
431 cache.clear();
432 }
433 self.stats.store(0, std::sync::atomic::Ordering::Relaxed);
434 self.hits.store(0, std::sync::atomic::Ordering::Relaxed);
435 }
436
437 fn get_cached(&self, key: &CacheKey) -> Option<CacheEntry> {
439 self.stats
440 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
441 if let Ok(cache) = self.cache.read() {
442 if let Some(entry) = cache.get(key) {
443 self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
444 return Some(entry.clone());
445 }
446 }
447 None
448 }
449
450 fn store_cached(&self, key: CacheKey, entry: CacheEntry) {
452 if let Ok(mut cache) = self.cache.write() {
453 if cache.len() >= self.config.max_entries {
455 let to_remove: Vec<CacheKey> = cache
456 .keys()
457 .take(self.config.max_entries / 2)
458 .cloned()
459 .collect();
460 for k in to_remove {
461 cache.remove(&k);
462 }
463 }
464 cache.insert(key, entry);
465 }
466 }
467}
468
469#[derive(Debug, Clone)]
471pub struct CacheStats {
472 pub total_lookups: usize,
474 pub cache_hits: usize,
476 pub cache_misses: usize,
478 pub hit_rate: f64,
480 pub cache_size: usize,
482}
483
484impl<M: ComponentMatcher> ComponentMatcher for CachedMatcher<M> {
485 fn match_score(&self, a: &Component, b: &Component) -> f64 {
486 let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
487
488 if let Some(entry) = self.get_cached(&key) {
490 return entry.score;
491 }
492
493 let score = self.inner.match_score(a, b);
495 self.store_cached(
496 key,
497 CacheEntry {
498 score,
499 detailed: None,
500 },
501 );
502 score
503 }
504
505 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
506 if !self.config.cache_detailed {
507 return self.inner.match_detailed(a, b);
508 }
509
510 let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
511
512 if let Some(entry) = self.get_cached(&key) {
514 if let Some(detailed) = entry.detailed {
515 return detailed;
516 }
517 }
518
519 let result = self.inner.match_detailed(a, b);
521 self.store_cached(
522 key,
523 CacheEntry {
524 score: result.score,
525 detailed: Some(result.clone()),
526 },
527 );
528 result
529 }
530
531 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
532 self.inner.explain_match(a, b)
534 }
535
536 fn name(&self) -> &str {
537 "CachedMatcher"
538 }
539
540 fn threshold(&self) -> f64 {
541 self.inner.threshold()
542 }
543}
544
545pub struct CompositeMatcherBuilder {
547 matchers: Vec<Box<dyn ComponentMatcher>>,
548}
549
550impl CompositeMatcherBuilder {
551 pub fn new() -> Self {
553 Self {
554 matchers: Vec::new(),
555 }
556 }
557
558 pub fn with_matcher(mut self, matcher: Box<dyn ComponentMatcher>) -> Self {
560 self.matchers.push(matcher);
561 self
562 }
563
564 pub fn build(self) -> CompositeMatcher {
566 CompositeMatcher {
567 matchers: self.matchers,
568 }
569 }
570}
571
572impl Default for CompositeMatcherBuilder {
573 fn default() -> Self {
574 Self::new()
575 }
576}
577
578pub struct CompositeMatcher {
580 matchers: Vec<Box<dyn ComponentMatcher>>,
581}
582
583impl ComponentMatcher for CompositeMatcher {
584 fn match_score(&self, a: &Component, b: &Component) -> f64 {
585 self.matchers
587 .iter()
588 .map(|m| m.match_score(a, b))
589 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
590 .unwrap_or(0.0)
591 }
592
593 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
594 self.matchers
596 .iter()
597 .map(|m| m.match_detailed(a, b))
598 .max_by(|a, b| {
599 a.score
600 .partial_cmp(&b.score)
601 .unwrap_or(std::cmp::Ordering::Equal)
602 })
603 .unwrap_or_else(MatchResult::no_match)
604 }
605
606 fn name(&self) -> &str {
607 "CompositeMatcher"
608 }
609}
610
611#[cfg(test)]
612mod tests {
613 use super::*;
614
615 struct FixedScoreMatcher(f64);
617
618 impl ComponentMatcher for FixedScoreMatcher {
619 fn match_score(&self, _a: &Component, _b: &Component) -> f64 {
620 self.0
621 }
622
623 fn name(&self) -> &str {
624 "FixedScoreMatcher"
625 }
626 }
627
628 #[test]
629 fn test_match_result_creation() {
630 let result = MatchResult::new(0.95, MatchTier::Alias);
631 assert_eq!(result.score, 0.95);
632 assert_eq!(result.tier, MatchTier::Alias);
633 assert!(result.is_match());
634 }
635
636 #[test]
637 fn test_no_match_result() {
638 let result = MatchResult::no_match();
639 assert_eq!(result.score, 0.0);
640 assert_eq!(result.tier, MatchTier::None);
641 assert!(!result.is_match());
642 }
643
644 #[test]
645 fn test_match_tier_default_scores() {
646 assert_eq!(MatchTier::ExactIdentifier.default_score(), 1.0);
647 assert_eq!(MatchTier::Alias.default_score(), 0.95);
648 assert_eq!(MatchTier::EcosystemRule.default_score(), 0.90);
649 assert_eq!(MatchTier::None.default_score(), 0.0);
650 }
651
652 #[test]
653 fn test_composite_matcher() {
654 let matcher = CompositeMatcherBuilder::new()
655 .with_matcher(Box::new(FixedScoreMatcher(0.5)))
656 .with_matcher(Box::new(FixedScoreMatcher(0.8)))
657 .with_matcher(Box::new(FixedScoreMatcher(0.3)))
658 .build();
659
660 let comp_a = Component::new("test".to_string(), "id-1".to_string());
661 let comp_b = Component::new("test".to_string(), "id-2".to_string());
662
663 assert_eq!(matcher.match_score(&comp_a, &comp_b), 0.8);
665 }
666
667 #[test]
668 fn test_find_best_match() {
669 let matcher = FixedScoreMatcher(0.85);
670 let target = Component::new("target".to_string(), "id-0".to_string());
671 let candidates: Vec<Component> = vec![
672 Component::new("candidate1".to_string(), "id-1".to_string()),
673 Component::new("candidate2".to_string(), "id-2".to_string()),
674 ];
675 let candidate_refs: Vec<&Component> = candidates.iter().collect();
676
677 let result = matcher.find_best_match(&target, &candidate_refs, 0.8);
679 assert!(result.is_some());
680
681 let result = matcher.find_best_match(&target, &candidate_refs, 0.9);
683 assert!(result.is_none());
684 }
685
686 #[test]
687 fn test_match_explanation_matched() {
688 let explanation =
689 MatchExplanation::matched(MatchTier::ExactIdentifier, 1.0, "Test match reason");
690
691 assert!(explanation.is_match);
692 assert_eq!(explanation.score, 1.0);
693 assert_eq!(explanation.tier, MatchTier::ExactIdentifier);
694 assert!(explanation.summary().contains("MATCH"));
695 assert!(explanation.summary().contains("100%"));
696 }
697
698 #[test]
699 fn test_match_explanation_no_match() {
700 let explanation = MatchExplanation::no_match("Components are too different");
701
702 assert!(!explanation.is_match);
703 assert_eq!(explanation.score, 0.0);
704 assert_eq!(explanation.tier, MatchTier::None);
705 assert!(explanation.summary().contains("NO MATCH"));
706 }
707
708 #[test]
709 fn test_match_explanation_with_breakdown() {
710 let explanation = MatchExplanation::matched(MatchTier::Fuzzy, 0.85, "Fuzzy match")
711 .with_score_component(ScoreComponent {
712 name: "Jaro-Winkler".to_string(),
713 weight: 0.7,
714 raw_score: 0.9,
715 weighted_score: 0.63,
716 description: "name similarity".to_string(),
717 })
718 .with_score_component(ScoreComponent {
719 name: "Levenshtein".to_string(),
720 weight: 0.3,
721 raw_score: 0.73,
722 weighted_score: 0.22,
723 description: "edit distance".to_string(),
724 })
725 .with_normalization("lowercase");
726
727 assert_eq!(explanation.score_breakdown.len(), 2);
728 assert_eq!(explanation.normalizations_applied.len(), 1);
729
730 let detailed = explanation.detailed();
731 assert!(detailed.contains("Score breakdown:"));
732 assert!(detailed.contains("Jaro-Winkler"));
733 assert!(detailed.contains("Normalizations: lowercase"));
734 }
735
736 #[test]
737 fn test_match_explanation_display() {
738 let explanation = MatchExplanation::matched(MatchTier::Alias, 0.95, "Known alias");
739 let display = format!("{}", explanation);
740 assert!(display.contains("MATCH"));
741 assert!(display.contains("95%"));
742 }
743}