use crate::model::Component;
#[derive(Debug, Clone)]
#[must_use]
pub struct MatchResult {
pub score: f64,
pub tier: MatchTier,
pub metadata: MatchMetadata,
}
impl MatchResult {
pub fn new(score: f64, tier: MatchTier) -> Self {
Self {
score,
tier,
metadata: MatchMetadata::default(),
}
}
pub const fn with_metadata(score: f64, tier: MatchTier, metadata: MatchMetadata) -> Self {
Self {
score,
tier,
metadata,
}
}
pub fn no_match() -> Self {
Self {
score: 0.0,
tier: MatchTier::None,
metadata: MatchMetadata::default(),
}
}
#[must_use]
pub fn is_match(&self) -> bool {
self.score > 0.0 && self.tier != MatchTier::None
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum MatchTier {
None,
ExactIdentifier,
Alias,
EcosystemRule,
Fuzzy,
CustomRule,
}
impl MatchTier {
#[must_use]
pub const fn default_score(&self) -> f64 {
match self {
Self::None => 0.0,
Self::ExactIdentifier => 1.0,
Self::Alias => 0.95,
Self::EcosystemRule => 0.90,
Self::CustomRule => 0.92,
Self::Fuzzy => 0.80,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct MatchMetadata {
pub matched_fields: Vec<String>,
pub normalization: Option<String>,
pub rule_id: Option<String>,
}
#[derive(Debug, Clone)]
pub struct MatchExplanation {
pub tier: MatchTier,
pub score: f64,
pub reason: String,
pub score_breakdown: Vec<ScoreComponent>,
pub normalizations_applied: Vec<String>,
pub is_match: bool,
}
#[derive(Debug, Clone)]
pub struct ScoreComponent {
pub name: String,
pub weight: f64,
pub raw_score: f64,
pub weighted_score: f64,
pub description: String,
}
impl MatchExplanation {
pub fn matched(tier: MatchTier, score: f64, reason: impl Into<String>) -> Self {
Self {
tier,
score,
reason: reason.into(),
score_breakdown: Vec::new(),
normalizations_applied: Vec::new(),
is_match: true,
}
}
pub fn no_match(reason: impl Into<String>) -> Self {
Self {
tier: MatchTier::None,
score: 0.0,
reason: reason.into(),
score_breakdown: Vec::new(),
normalizations_applied: Vec::new(),
is_match: false,
}
}
#[must_use]
pub fn with_score_component(mut self, component: ScoreComponent) -> Self {
self.score_breakdown.push(component);
self
}
#[must_use]
pub fn with_normalization(mut self, normalization: impl Into<String>) -> Self {
self.normalizations_applied.push(normalization.into());
self
}
#[must_use]
pub fn summary(&self) -> String {
if self.is_match {
format!(
"MATCH ({:.0}% confidence via {:?}): {}",
self.score * 100.0,
self.tier,
self.reason
)
} else {
format!("NO MATCH: {}", self.reason)
}
}
#[must_use]
pub fn detailed(&self) -> String {
let mut lines = vec![self.summary()];
if !self.score_breakdown.is_empty() {
lines.push("Score breakdown:".to_string());
for component in &self.score_breakdown {
lines.push(format!(
" - {}: {:.2} × {:.2} = {:.2} ({})",
component.name,
component.raw_score,
component.weight,
component.weighted_score,
component.description
));
}
}
if !self.normalizations_applied.is_empty() {
lines.push(format!(
"Normalizations: {}",
self.normalizations_applied.join(", ")
));
}
lines.join("\n")
}
}
impl std::fmt::Display for MatchExplanation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.summary())
}
}
pub trait ComponentMatcher: Send + Sync {
fn match_score(&self, a: &Component, b: &Component) -> f64;
fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
let score = self.match_score(a, b);
if score > 0.0 {
MatchResult::new(score, MatchTier::Fuzzy)
} else {
MatchResult::no_match()
}
}
fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
let result = self.match_detailed(a, b);
if result.is_match() {
MatchExplanation::matched(
result.tier,
result.score,
format!("'{}' matches '{}' via {:?}", a.name, b.name, result.tier),
)
} else {
MatchExplanation::no_match(format!(
"'{}' does not match '{}' (score {:.2} below threshold)",
a.name, b.name, result.score
))
}
}
fn find_best_match<'a>(
&self,
target: &Component,
candidates: &'a [&Component],
threshold: f64,
) -> Option<(&'a Component, f64)> {
candidates
.iter()
.map(|c| (*c, self.match_score(target, c)))
.filter(|(_, score)| *score >= threshold)
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
}
fn name(&self) -> &'static str {
"ComponentMatcher"
}
fn threshold(&self) -> f64 {
0.0
}
}
#[derive(Debug, Clone)]
pub struct CacheConfig {
pub max_entries: usize,
pub cache_detailed: bool,
}
impl Default for CacheConfig {
fn default() -> Self {
Self {
max_entries: 100_000,
cache_detailed: false,
}
}
}
impl CacheConfig {
#[must_use]
pub const fn small() -> Self {
Self {
max_entries: 10_000,
cache_detailed: true,
}
}
#[must_use]
pub const fn large() -> Self {
Self {
max_entries: 500_000,
cache_detailed: false,
}
}
}
#[derive(Hash, Eq, PartialEq, Clone)]
struct CacheKey {
hash: u64,
}
impl CacheKey {
fn new(a_id: &str, b_id: &str) -> Self {
use xxhash_rust::xxh3::xxh3_64;
let (first, second) = if a_id < b_id {
(a_id, b_id)
} else {
(b_id, a_id)
};
let combined = format!("{first}|{second}");
Self {
hash: xxh3_64(combined.as_bytes()),
}
}
}
#[derive(Clone)]
struct CacheEntry {
score: f64,
detailed: Option<MatchResult>,
}
pub struct CachedMatcher<M: ComponentMatcher> {
inner: M,
config: CacheConfig,
cache: std::sync::RwLock<std::collections::HashMap<CacheKey, CacheEntry>>,
stats: std::sync::atomic::AtomicUsize,
hits: std::sync::atomic::AtomicUsize,
}
impl<M: ComponentMatcher> CachedMatcher<M> {
pub fn new(inner: M) -> Self {
Self::with_config(inner, CacheConfig::default())
}
pub fn with_config(inner: M, config: CacheConfig) -> Self {
Self {
inner,
config,
cache: std::sync::RwLock::new(std::collections::HashMap::new()),
stats: std::sync::atomic::AtomicUsize::new(0),
hits: std::sync::atomic::AtomicUsize::new(0),
}
}
pub const fn inner(&self) -> &M {
&self.inner
}
pub fn cache_stats(&self) -> CacheStats {
let total = self.stats.load(std::sync::atomic::Ordering::Relaxed);
let hits = self.hits.load(std::sync::atomic::Ordering::Relaxed);
let size = self.cache.read().map(|c| c.len()).unwrap_or(0);
CacheStats {
total_lookups: total,
cache_hits: hits,
cache_misses: total.saturating_sub(hits),
hit_rate: if total > 0 {
hits as f64 / total as f64
} else {
0.0
},
cache_size: size,
}
}
pub fn clear_cache(&self) {
if let Ok(mut cache) = self.cache.write() {
cache.clear();
}
self.stats.store(0, std::sync::atomic::Ordering::Relaxed);
self.hits.store(0, std::sync::atomic::Ordering::Relaxed);
}
fn get_cached(&self, key: &CacheKey) -> Option<CacheEntry> {
self.stats
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
if let Ok(cache) = self.cache.read()
&& let Some(entry) = cache.get(key)
{
self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
return Some(entry.clone());
}
None
}
fn store_cached(&self, key: CacheKey, entry: CacheEntry) {
if let Ok(mut cache) = self.cache.write() {
if cache.len() >= self.config.max_entries {
let to_remove: Vec<CacheKey> = cache
.keys()
.take(self.config.max_entries / 2)
.cloned()
.collect();
for k in to_remove {
cache.remove(&k);
}
}
cache.insert(key, entry);
}
}
}
#[derive(Debug, Clone)]
pub struct CacheStats {
pub total_lookups: usize,
pub cache_hits: usize,
pub cache_misses: usize,
pub hit_rate: f64,
pub cache_size: usize,
}
impl<M: ComponentMatcher> ComponentMatcher for CachedMatcher<M> {
fn match_score(&self, a: &Component, b: &Component) -> f64 {
let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
if let Some(entry) = self.get_cached(&key) {
return entry.score;
}
let score = self.inner.match_score(a, b);
self.store_cached(
key,
CacheEntry {
score,
detailed: None,
},
);
score
}
fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
if !self.config.cache_detailed {
return self.inner.match_detailed(a, b);
}
let key = CacheKey::new(a.canonical_id.value(), b.canonical_id.value());
if let Some(entry) = self.get_cached(&key)
&& let Some(detailed) = entry.detailed
{
return detailed;
}
let result = self.inner.match_detailed(a, b);
self.store_cached(
key,
CacheEntry {
score: result.score,
detailed: Some(result.clone()),
},
);
result
}
fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
self.inner.explain_match(a, b)
}
fn name(&self) -> &'static str {
"CachedMatcher"
}
fn threshold(&self) -> f64 {
self.inner.threshold()
}
}
#[must_use]
pub struct CompositeMatcherBuilder {
matchers: Vec<Box<dyn ComponentMatcher>>,
}
impl CompositeMatcherBuilder {
pub fn new() -> Self {
Self {
matchers: Vec::new(),
}
}
pub fn with_matcher(mut self, matcher: Box<dyn ComponentMatcher>) -> Self {
self.matchers.push(matcher);
self
}
#[must_use]
pub fn build(self) -> CompositeMatcher {
CompositeMatcher {
matchers: self.matchers,
}
}
}
impl Default for CompositeMatcherBuilder {
fn default() -> Self {
Self::new()
}
}
pub struct CompositeMatcher {
matchers: Vec<Box<dyn ComponentMatcher>>,
}
impl ComponentMatcher for CompositeMatcher {
fn match_score(&self, a: &Component, b: &Component) -> f64 {
self.matchers
.iter()
.map(|m| m.match_score(a, b))
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or(0.0)
}
fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
self.matchers
.iter()
.map(|m| m.match_detailed(a, b))
.max_by(|a, b| {
a.score
.partial_cmp(&b.score)
.unwrap_or(std::cmp::Ordering::Equal)
})
.unwrap_or_else(MatchResult::no_match)
}
fn name(&self) -> &'static str {
"CompositeMatcher"
}
}
#[cfg(test)]
mod tests {
use super::*;
struct FixedScoreMatcher(f64);
impl ComponentMatcher for FixedScoreMatcher {
fn match_score(&self, _a: &Component, _b: &Component) -> f64 {
self.0
}
fn name(&self) -> &'static str {
"FixedScoreMatcher"
}
}
#[test]
fn test_match_result_creation() {
let result = MatchResult::new(0.95, MatchTier::Alias);
assert_eq!(result.score, 0.95);
assert_eq!(result.tier, MatchTier::Alias);
assert!(result.is_match());
}
#[test]
fn test_no_match_result() {
let result = MatchResult::no_match();
assert_eq!(result.score, 0.0);
assert_eq!(result.tier, MatchTier::None);
assert!(!result.is_match());
}
#[test]
fn test_match_tier_default_scores() {
assert_eq!(MatchTier::ExactIdentifier.default_score(), 1.0);
assert_eq!(MatchTier::Alias.default_score(), 0.95);
assert_eq!(MatchTier::EcosystemRule.default_score(), 0.90);
assert_eq!(MatchTier::None.default_score(), 0.0);
}
#[test]
fn test_composite_matcher() {
let matcher = CompositeMatcherBuilder::new()
.with_matcher(Box::new(FixedScoreMatcher(0.5)))
.with_matcher(Box::new(FixedScoreMatcher(0.8)))
.with_matcher(Box::new(FixedScoreMatcher(0.3)))
.build();
let comp_a = Component::new("test".to_string(), "id-1".to_string());
let comp_b = Component::new("test".to_string(), "id-2".to_string());
assert_eq!(matcher.match_score(&comp_a, &comp_b), 0.8);
}
#[test]
fn test_find_best_match() {
let matcher = FixedScoreMatcher(0.85);
let target = Component::new("target".to_string(), "id-0".to_string());
let candidates: Vec<Component> = vec![
Component::new("candidate1".to_string(), "id-1".to_string()),
Component::new("candidate2".to_string(), "id-2".to_string()),
];
let candidate_refs: Vec<&Component> = candidates.iter().collect();
let result = matcher.find_best_match(&target, &candidate_refs, 0.8);
assert!(result.is_some());
let result = matcher.find_best_match(&target, &candidate_refs, 0.9);
assert!(result.is_none());
}
#[test]
fn test_match_explanation_matched() {
let explanation =
MatchExplanation::matched(MatchTier::ExactIdentifier, 1.0, "Test match reason");
assert!(explanation.is_match);
assert_eq!(explanation.score, 1.0);
assert_eq!(explanation.tier, MatchTier::ExactIdentifier);
assert!(explanation.summary().contains("MATCH"));
assert!(explanation.summary().contains("100%"));
}
#[test]
fn test_match_explanation_no_match() {
let explanation = MatchExplanation::no_match("Components are too different");
assert!(!explanation.is_match);
assert_eq!(explanation.score, 0.0);
assert_eq!(explanation.tier, MatchTier::None);
assert!(explanation.summary().contains("NO MATCH"));
}
#[test]
fn test_match_explanation_with_breakdown() {
let explanation = MatchExplanation::matched(MatchTier::Fuzzy, 0.85, "Fuzzy match")
.with_score_component(ScoreComponent {
name: "Jaro-Winkler".to_string(),
weight: 0.7,
raw_score: 0.9,
weighted_score: 0.63,
description: "name similarity".to_string(),
})
.with_score_component(ScoreComponent {
name: "Levenshtein".to_string(),
weight: 0.3,
raw_score: 0.73,
weighted_score: 0.22,
description: "edit distance".to_string(),
})
.with_normalization("lowercase");
assert_eq!(explanation.score_breakdown.len(), 2);
assert_eq!(explanation.normalizations_applied.len(), 1);
let detailed = explanation.detailed();
assert!(detailed.contains("Score breakdown:"));
assert!(detailed.contains("Jaro-Winkler"));
assert!(detailed.contains("Normalizations: lowercase"));
}
#[test]
fn test_match_explanation_display() {
let explanation = MatchExplanation::matched(MatchTier::Alias, 0.95, "Known alias");
let display = format!("{}", explanation);
assert!(display.contains("MATCH"));
assert!(display.contains("95%"));
}
}