1pub mod adaptive;
30mod aliases;
31mod config;
32pub mod cross_ecosystem;
33pub mod custom_rules;
34pub mod ecosystem_config;
35pub mod index;
36pub mod lsh;
37mod purl;
38pub mod rule_engine;
39mod rules;
40pub mod scoring;
41pub mod string_similarity;
42mod traits;
43
44pub use adaptive::{
45 AdaptiveMatching, AdaptiveMethod, AdaptiveThreshold, AdaptiveThresholdConfig,
46 AdaptiveThresholdResult, ScoreStats,
47};
48pub use aliases::AliasTable;
49pub use config::{CrossEcosystemConfig, FuzzyMatchConfig, MultiFieldWeights};
50pub use cross_ecosystem::{CrossEcosystemDb, CrossEcosystemMatch, PackageFamily};
51pub use custom_rules::{
52 AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig, RulePrecedence,
53 RulesSummary,
54};
55pub use ecosystem_config::{
56 ConfigError, CustomEquivalence, CustomRules, EcosystemConfig, EcosystemRulesConfig,
57 GlobalSettings, GroupMigration, ImportMapping, NormalizationConfig, PackageGroup,
58 ScopeHandling, SecurityConfig, TyposquatEntry, VersionSpec, VersioningConfig,
59};
60pub use index::{
61 BatchCandidateConfig, BatchCandidateGenerator, BatchCandidateResult, BatchCandidateStats,
62 ComponentIndex, IndexStats, LazyComponentIndex, NormalizedEntry,
63};
64pub use lsh::{LshConfig, LshIndex, LshIndexStats, MinHashSignature};
65pub use purl::PurlNormalizer;
66pub use rule_engine::{AppliedRule, AppliedRuleType, RuleApplicationResult, RuleEngine};
67pub use rules::EcosystemRules;
68pub use traits::{
69 CacheConfig, CacheStats, CachedMatcher, ComponentMatcher, CompositeMatcher,
70 CompositeMatcherBuilder, MatchExplanation, MatchMetadata, MatchResult, MatchTier,
71 ScoreComponent,
72};
73pub use scoring::MultiFieldScoreResult;
74
75use crate::model::Component;
76use strsim::{jaro_winkler, levenshtein};
77
78#[must_use]
80pub struct FuzzyMatcher {
81 config: FuzzyMatchConfig,
82 alias_table: AliasTable,
83 purl_normalizer: PurlNormalizer,
84 ecosystem_rules: EcosystemRules,
85}
86
87impl FuzzyMatcher {
88 pub fn new(config: FuzzyMatchConfig) -> Self {
90 Self {
91 config,
92 alias_table: AliasTable::default(),
93 purl_normalizer: PurlNormalizer::new(),
94 ecosystem_rules: EcosystemRules::new(),
95 }
96 }
97
98 #[must_use]
100 pub const fn config(&self) -> &FuzzyMatchConfig {
101 &self.config
102 }
103
104 pub fn with_alias_table(mut self, table: AliasTable) -> Self {
106 self.alias_table = table;
107 self
108 }
109
110 #[must_use]
112 pub fn match_components(&self, a: &Component, b: &Component) -> f64 {
113 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
115 let norm_a = self.purl_normalizer.normalize(purl_a);
116 let norm_b = self.purl_normalizer.normalize(purl_b);
117 if norm_a == norm_b {
118 return 1.0;
119 }
120 }
121
122 if self.check_alias_match(a, b) {
124 return 0.95;
125 }
126
127 if let Some(score) = self.check_ecosystem_rules(a, b) {
129 if score >= 0.90 {
130 return score;
131 }
132 }
133
134 if let Some(ref weights) = self.config.field_weights {
136 let result = self.compute_multi_field_score(a, b, weights);
138 if result.total >= self.config.threshold {
139 return result.total;
140 }
141 } else {
142 let fuzzy_score = self.compute_fuzzy_score(a, b);
144 if fuzzy_score >= self.config.threshold {
145 return fuzzy_score;
146 }
147 }
148
149 0.0
150 }
151
152 fn check_alias_match(&self, a: &Component, b: &Component) -> bool {
154 let names_a = self.get_all_names(a);
156 let names_b = self.get_all_names(b);
157
158 for name_a in &names_a {
159 if let Some(canonical) = self.alias_table.get_canonical(name_a) {
160 for name_b in &names_b {
161 if self.alias_table.is_alias(&canonical, name_b) {
162 return true;
163 }
164 }
165 }
166 }
167
168 false
169 }
170
171 fn get_all_names(&self, comp: &Component) -> Vec<String> {
173 let mut names = vec![comp.name.clone()];
174 names.extend(comp.identifiers.aliases.clone());
175
176 if let Some(purl) = &comp.identifiers.purl {
178 if let Some(name) = self.extract_name_from_purl(purl) {
179 names.push(name);
180 }
181 }
182
183 names
184 }
185
186 fn extract_name_from_purl(&self, purl: &str) -> Option<String> {
188 let without_pkg = purl.strip_prefix("pkg:")?;
190 let parts: Vec<&str> = without_pkg.split('/').collect();
191
192 if parts.len() >= 2 {
193 let name_part = parts.last()?;
194 let name = name_part.split('@').next()?;
196 Some(name.to_string())
197 } else {
198 None
199 }
200 }
201
202 fn check_ecosystem_rules(&self, a: &Component, b: &Component) -> Option<f64> {
204 let ecosystem_a = a.ecosystem.as_ref()?;
205 let ecosystem_b = b.ecosystem.as_ref()?;
206
207 if ecosystem_a != ecosystem_b {
209 return None;
210 }
211
212 let norm_a = self.ecosystem_rules.normalize_name(&a.name, ecosystem_a);
213 let norm_b = self.ecosystem_rules.normalize_name(&b.name, ecosystem_b);
214
215 if norm_a == norm_b {
216 return Some(0.90);
217 }
218
219 None
220 }
221
222 fn compute_fuzzy_score(&self, a: &Component, b: &Component) -> f64 {
224 let name_a = a.name.to_lowercase();
225 let name_b = b.name.to_lowercase();
226
227 let jw_score = jaro_winkler(&name_a, &name_b);
229
230 let max_len = name_a.len().max(name_b.len());
232 let lev_distance = levenshtein(&name_a, &name_b);
233 let lev_score = if max_len > 0 {
234 1.0 - (lev_distance as f64 / max_len as f64)
235 } else {
236 1.0
237 };
238
239 let token_score = Self::compute_token_similarity(&name_a, &name_b);
241
242 let phonetic_score = Self::compute_phonetic_similarity(&name_a, &name_b);
244
245 let char_score = jw_score.mul_add(self.config.jaro_winkler_weight, lev_score * self.config.levenshtein_weight);
247
248 let combined = char_score.max(token_score).max(phonetic_score * 0.85);
251
252 let version_boost = Self::compute_version_similarity(a.version.as_ref(), b.version.as_ref());
254
255 (combined + version_boost).min(1.0)
256 }
257
258 fn compute_token_similarity(name_a: &str, name_b: &str) -> f64 {
260 string_similarity::compute_token_similarity(name_a, name_b)
261 }
262
263 fn compute_version_similarity(va: Option<&String>, vb: Option<&String>) -> f64 {
265 string_similarity::compute_version_similarity(va, vb)
266 }
267
268 #[must_use]
270 pub fn compute_phonetic_similarity(name_a: &str, name_b: &str) -> f64 {
271 string_similarity::compute_phonetic_similarity(name_a, name_b)
272 }
273
274 #[must_use]
278 pub fn compute_multi_field_score(
279 &self,
280 a: &Component,
281 b: &Component,
282 weights: &config::MultiFieldWeights,
283 ) -> scoring::MultiFieldScoreResult {
284 use std::collections::HashSet;
285
286 let mut result = scoring::MultiFieldScoreResult::default();
287
288 let name_score = self.compute_fuzzy_score(a, b);
290 result.name_score = name_score;
291 result.total += name_score * weights.name;
292
293 let version_score = if weights.version_divergence_enabled {
295 scoring::compute_version_divergence_score(&a.version, &b.version, weights)
296 } else {
297 match (&a.version, &b.version) {
299 (Some(va), Some(vb)) if va == vb => 1.0,
300 (None, None) => 0.5, _ => 0.0,
302 }
303 };
304 result.version_score = version_score;
305 result.total += version_score * weights.version;
306
307 let (ecosystem_score, ecosystem_penalty) = match (&a.ecosystem, &b.ecosystem) {
309 (Some(ea), Some(eb)) if ea == eb => (1.0, 0.0),
310 (None, None) => (0.5, 0.0), (Some(_), Some(_)) => (0.0, weights.ecosystem_mismatch_penalty), _ => (0.0, 0.0), };
314 result.ecosystem_score = ecosystem_score;
315 result.total += ecosystem_score.mul_add(weights.ecosystem, ecosystem_penalty);
316
317 let licenses_a: HashSet<_> = a
319 .licenses
320 .declared
321 .iter()
322 .map(|l| l.expression.as_str())
323 .collect();
324 let licenses_b: HashSet<_> = b
325 .licenses
326 .declared
327 .iter()
328 .map(|l| l.expression.as_str())
329 .collect();
330 let license_score = if licenses_a.is_empty() && licenses_b.is_empty() {
331 0.5 } else if licenses_a.is_empty() || licenses_b.is_empty() {
333 0.0 } else {
335 let intersection = licenses_a.intersection(&licenses_b).count();
336 let union = licenses_a.union(&licenses_b).count();
337 if union > 0 {
338 intersection as f64 / union as f64
339 } else {
340 0.0
341 }
342 };
343 result.license_score = license_score;
344 result.total += license_score * weights.licenses;
345
346 let supplier_score = match (&a.supplier, &b.supplier) {
348 (Some(sa), Some(sb)) if sa.name.to_lowercase() == sb.name.to_lowercase() => 1.0,
349 (None, None) => 0.5, _ => 0.0,
351 };
352 result.supplier_score = supplier_score;
353 result.total += supplier_score * weights.supplier;
354
355 let group_score = match (&a.group, &b.group) {
357 (Some(ga), Some(gb)) if ga.to_lowercase() == gb.to_lowercase() => 1.0,
358 (None, None) => 0.5, _ => 0.0,
360 };
361 result.group_score = group_score;
362 result.total += group_score * weights.group;
363
364 result.total = result.total.clamp(0.0, 1.0);
366
367 result
368 }
369}
370
371impl Default for FuzzyMatcher {
372 fn default() -> Self {
373 Self::new(FuzzyMatchConfig::balanced())
374 }
375}
376
377impl ComponentMatcher for FuzzyMatcher {
378 fn match_score(&self, a: &Component, b: &Component) -> f64 {
379 self.match_components(a, b)
380 }
381
382 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
383 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
385 let norm_a = self.purl_normalizer.normalize(purl_a);
386 let norm_b = self.purl_normalizer.normalize(purl_b);
387 if norm_a == norm_b {
388 return MatchResult::with_metadata(
389 1.0,
390 MatchTier::ExactIdentifier,
391 MatchMetadata {
392 matched_fields: vec!["purl".to_string()],
393 normalization: Some("purl_normalized".to_string()),
394 rule_id: None,
395 },
396 );
397 }
398 }
399
400 if self.check_alias_match(a, b) {
402 return MatchResult::with_metadata(
403 0.95,
404 MatchTier::Alias,
405 MatchMetadata {
406 matched_fields: vec!["name".to_string()],
407 normalization: Some("alias_table".to_string()),
408 rule_id: None,
409 },
410 );
411 }
412
413 if let Some(score) = self.check_ecosystem_rules(a, b) {
415 if score >= 0.90 {
416 return MatchResult::with_metadata(
417 score,
418 MatchTier::EcosystemRule,
419 MatchMetadata {
420 matched_fields: vec!["name".to_string(), "ecosystem".to_string()],
421 normalization: Some("ecosystem_rules".to_string()),
422 rule_id: None,
423 },
424 );
425 }
426 }
427
428 let fuzzy_score = self.compute_fuzzy_score(a, b);
430 if fuzzy_score >= self.config.threshold {
431 return MatchResult::with_metadata(
432 fuzzy_score,
433 MatchTier::Fuzzy,
434 MatchMetadata {
435 matched_fields: vec!["name".to_string()],
436 normalization: Some("fuzzy_similarity".to_string()),
437 rule_id: None,
438 },
439 );
440 }
441
442 MatchResult::no_match()
443 }
444
445 fn name(&self) -> &'static str {
446 "FuzzyMatcher"
447 }
448
449 fn threshold(&self) -> f64 {
450 self.config.threshold
451 }
452
453 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
454 use strsim::{jaro_winkler, levenshtein};
455
456 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
458 let norm_a = self.purl_normalizer.normalize(purl_a);
459 let norm_b = self.purl_normalizer.normalize(purl_b);
460 if norm_a == norm_b {
461 return MatchExplanation::matched(
462 MatchTier::ExactIdentifier,
463 1.0,
464 format!(
465 "Exact PURL match: '{purl_a}' equals '{purl_b}' after normalization"
466 ),
467 )
468 .with_normalization("purl_normalized");
469 }
470 }
471
472 if self.check_alias_match(a, b) {
474 return MatchExplanation::matched(
475 MatchTier::Alias,
476 0.95,
477 format!(
478 "'{}' and '{}' are known aliases of the same package",
479 a.name, b.name
480 ),
481 )
482 .with_normalization("alias_table");
483 }
484
485 if let Some(score) = self.check_ecosystem_rules(a, b) {
487 if score >= 0.90 {
488 let ecosystem = a
489 .ecosystem
490 .as_ref().map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
491 return MatchExplanation::matched(
492 MatchTier::EcosystemRule,
493 score,
494 format!(
495 "Names match after {} ecosystem normalization: '{}' -> '{}'",
496 ecosystem, a.name, b.name
497 ),
498 )
499 .with_normalization(format!("{ecosystem}_normalization"));
500 }
501 }
502
503 let name_a = a.name.to_lowercase();
505 let name_b = b.name.to_lowercase();
506
507 let jw_score = jaro_winkler(&name_a, &name_b);
508 let max_len = name_a.len().max(name_b.len());
509 let lev_distance = levenshtein(&name_a, &name_b);
510 let lev_score = if max_len > 0 {
511 1.0 - (lev_distance as f64 / max_len as f64)
512 } else {
513 1.0
514 };
515
516 let jw_weighted = jw_score * self.config.jaro_winkler_weight;
517 let lev_weighted = lev_score * self.config.levenshtein_weight;
518
519 let version_boost = if a.version == b.version && a.version.is_some() {
520 0.05
521 } else {
522 0.0
523 };
524
525 let combined = (jw_weighted + lev_weighted + version_boost).min(1.0);
526
527 let mut explanation = if combined >= self.config.threshold {
528 MatchExplanation::matched(
529 MatchTier::Fuzzy,
530 combined,
531 format!(
532 "Fuzzy match: '{}' ~ '{}' with {:.0}% similarity",
533 a.name,
534 b.name,
535 combined * 100.0
536 ),
537 )
538 } else {
539 MatchExplanation::no_match(format!(
540 "Fuzzy similarity {:.2} below threshold {:.2}",
541 combined, self.config.threshold
542 ))
543 };
544
545 explanation = explanation
547 .with_score_component(ScoreComponent {
548 name: "Jaro-Winkler".to_string(),
549 weight: self.config.jaro_winkler_weight,
550 raw_score: jw_score,
551 weighted_score: jw_weighted,
552 description: format!("'{name_a}' vs '{name_b}' = {jw_score:.2}"),
553 })
554 .with_score_component(ScoreComponent {
555 name: "Levenshtein".to_string(),
556 weight: self.config.levenshtein_weight,
557 raw_score: lev_score,
558 weighted_score: lev_weighted,
559 description: format!(
560 "edit distance {lev_distance} / max_len {max_len} = {lev_score:.2}"
561 ),
562 });
563
564 if version_boost > 0.0 {
565 explanation = explanation.with_score_component(ScoreComponent {
566 name: "Version boost".to_string(),
567 weight: 1.0,
568 raw_score: version_boost,
569 weighted_score: version_boost,
570 description: format!("versions match: {:?}", a.version),
571 });
572 }
573
574 explanation.with_normalization("lowercase")
575 }
576}
577
578#[cfg(test)]
579mod tests {
580 use super::*;
581
582 #[test]
583 fn test_exact_purl_match() {
584 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
585
586 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
587 a.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
588
589 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
590 b.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
591
592 assert_eq!(matcher.match_components(&a, &b), 1.0);
593 }
594
595 #[test]
596 fn test_fuzzy_name_match() {
597 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
598
599 let a = Component::new("lodash-es".to_string(), "comp-1".to_string());
601 let b = Component::new("lodash".to_string(), "comp-2".to_string());
602
603 let score = matcher.match_components(&a, &b);
604 assert!(
606 score >= 0.70,
607 "lodash-es vs lodash should have score >= 0.70, got {}",
608 score
609 );
610 }
611
612 #[test]
613 fn test_different_names_low_score() {
614 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::strict());
615
616 let a = Component::new("react".to_string(), "comp-1".to_string());
617 let b = Component::new("angular".to_string(), "comp-2".to_string());
618
619 let score = matcher.match_components(&a, &b);
620 assert!(
621 score < 0.5,
622 "react vs angular should have low score, got {}",
623 score
624 );
625 }
626
627 #[test]
628 fn test_multi_field_weights_normalized() {
629 let weights = config::MultiFieldWeights::balanced();
630 assert!(
631 weights.is_normalized(),
632 "Balanced weights should be normalized"
633 );
634
635 let weights = config::MultiFieldWeights::name_focused();
636 assert!(
637 weights.is_normalized(),
638 "Name-focused weights should be normalized"
639 );
640
641 let weights = config::MultiFieldWeights::security_focused();
642 assert!(
643 weights.is_normalized(),
644 "Security-focused weights should be normalized"
645 );
646 }
647
648 #[test]
649 fn test_multi_field_scoring_same_component() {
650 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
651 let weights = config::MultiFieldWeights::balanced();
652
653 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
654 a.version = Some("4.17.21".to_string());
655 a.ecosystem = Some(crate::model::Ecosystem::Npm);
656
657 let result = matcher.compute_multi_field_score(&a, &a, &weights);
660 assert!(
661 result.total > 0.90,
662 "Same component should score > 0.90, got {}",
663 result.total
664 );
665 assert_eq!(result.name_score, 1.0);
666 assert_eq!(result.version_score, 1.0);
667 assert_eq!(result.ecosystem_score, 1.0);
668 assert_eq!(
670 result.license_score, 0.5,
671 "Empty licenses should be neutral"
672 );
673 assert_eq!(
674 result.supplier_score, 0.5,
675 "Empty supplier should be neutral"
676 );
677 assert_eq!(result.group_score, 0.5, "Empty group should be neutral");
678 }
679
680 #[test]
681 fn test_multi_field_scoring_different_versions() {
682 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
683 let weights = config::MultiFieldWeights::balanced();
684
685 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
686 a.version = Some("4.17.21".to_string());
687 a.ecosystem = Some(crate::model::Ecosystem::Npm);
688
689 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
690 b.version = Some("4.17.20".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
692
693 let result = matcher.compute_multi_field_score(&a, &b, &weights);
694
695 assert!(result.name_score > 0.9, "Name score should be > 0.9");
697
698 assert!(
702 result.version_score > 0.7,
703 "Same major.minor with patch diff should score high, got {}",
704 result.version_score
705 );
706
707 assert_eq!(
709 result.ecosystem_score, 1.0,
710 "Same ecosystem should score 1.0"
711 );
712
713 assert!(
715 result.total > 0.8,
716 "Total should be > 0.8, got {}",
717 result.total
718 );
719 }
720
721 #[test]
722 fn test_multi_field_scoring_different_major_versions() {
723 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
724 let weights = config::MultiFieldWeights::balanced();
725
726 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
727 a.version = Some("4.17.21".to_string());
728 a.ecosystem = Some(crate::model::Ecosystem::Npm);
729
730 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
731 b.version = Some("3.10.0".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
733
734 let result = matcher.compute_multi_field_score(&a, &b, &weights);
735
736 assert!(
740 result.version_score < 0.3,
741 "Different major versions should score low, got {}",
742 result.version_score
743 );
744 }
745
746 #[test]
747 fn test_multi_field_scoring_legacy_weights() {
748 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
750 let weights = config::MultiFieldWeights::legacy();
751
752 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
753 a.version = Some("4.17.21".to_string());
754 a.ecosystem = Some(crate::model::Ecosystem::Npm);
755
756 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
757 b.version = Some("4.17.20".to_string());
758 b.ecosystem = Some(crate::model::Ecosystem::Npm);
759
760 let result = matcher.compute_multi_field_score(&a, &b, &weights);
761
762 assert_eq!(
764 result.version_score, 0.0,
765 "Legacy mode: different versions should score 0"
766 );
767 }
768
769 #[test]
770 fn test_multi_field_config_preset() {
771 let config = FuzzyMatchConfig::from_preset("balanced-multi").unwrap();
772 assert!(config.field_weights.is_some());
773
774 let config = FuzzyMatchConfig::from_preset("strict_multi").unwrap();
775 assert!(config.field_weights.is_some());
776 }
777
778 #[test]
779 fn test_multi_field_score_result_summary() {
780 let result = MultiFieldScoreResult {
781 total: 0.85,
782 name_score: 1.0,
783 version_score: 0.0,
784 ecosystem_score: 1.0,
785 license_score: 0.5,
786 supplier_score: 0.5,
787 group_score: 0.5,
788 };
789
790 let summary = result.summary();
791 assert!(summary.contains("0.85"));
792 assert!(summary.contains("name: 1.00"));
793 }
794
795 #[test]
796 fn test_token_similarity_exact() {
797 let score = string_similarity::compute_token_similarity("react-dom", "react-dom");
798 assert_eq!(score, 1.0);
799 }
800
801 #[test]
802 fn test_token_similarity_reordered() {
803 let score = string_similarity::compute_token_similarity("react-dom", "dom-react");
805 assert_eq!(score, 1.0, "Reordered tokens should match perfectly");
806 }
807
808 #[test]
809 fn test_token_similarity_partial() {
810 let score = string_similarity::compute_token_similarity("react-dom-utils", "react-dom");
812 assert!(
814 (score - 0.667).abs() < 0.01,
815 "Partial overlap should be ~0.67, got {}",
816 score
817 );
818 }
819
820 #[test]
821 fn test_token_similarity_different_delimiters() {
822 let score = string_similarity::compute_token_similarity("my_package_name", "my-package-name");
824 assert_eq!(score, 1.0, "Different delimiters should match");
825 }
826
827 #[test]
828 fn test_token_similarity_no_overlap() {
829 let score = string_similarity::compute_token_similarity("react", "angular");
830 assert_eq!(score, 0.0, "No common tokens should score 0");
831 }
832
833 #[test]
834 fn test_version_similarity_exact() {
835 let v1 = "1.2.3".to_string();
836 let v2 = "1.2.3".to_string();
837 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
838 assert_eq!(score, 0.10, "Exact version match should give max boost");
839 }
840
841 #[test]
842 fn test_version_similarity_same_major_minor() {
843 let v1 = "1.2.3".to_string();
844 let v2 = "1.2.4".to_string();
845 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
846 assert_eq!(score, 0.07, "Same major.minor should give 0.07 boost");
847 }
848
849 #[test]
850 fn test_version_similarity_same_major() {
851 let v1 = "1.2.3".to_string();
852 let v2 = "1.5.0".to_string();
853 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
854 assert_eq!(score, 0.04, "Same major should give 0.04 boost");
855 }
856
857 #[test]
858 fn test_version_similarity_different_major() {
859 let v1 = "1.2.3".to_string();
860 let v2 = "2.0.0".to_string();
861 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
862 assert_eq!(score, 0.0, "Different major versions should give no boost");
863 }
864
865 #[test]
866 fn test_version_similarity_prerelease() {
867 let v1 = "1.2.3-beta".to_string();
869 let v2 = "1.2.4".to_string();
870 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
871 assert_eq!(score, 0.07, "Prerelease should still match major.minor");
872 }
873
874 #[test]
875 fn test_version_similarity_missing() {
876 let v = "1.0.0".to_string();
877 let score = FuzzyMatcher::compute_version_similarity(None, Some(&v));
878 assert_eq!(score, 0.0, "Missing version should give no boost");
879
880 let score = FuzzyMatcher::compute_version_similarity(None, None);
881 assert_eq!(score, 0.0, "Both missing should give no boost");
882 }
883
884 #[test]
885 fn test_fuzzy_match_with_reordered_tokens() {
886 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
887
888 let a = Component::new("react-dom".to_string(), "comp-1".to_string());
889 let b = Component::new("dom-react".to_string(), "comp-2".to_string());
890
891 let score = matcher.match_components(&a, &b);
892 assert!(
894 score > 0.5,
895 "Reordered names should still match, got {}",
896 score
897 );
898 }
899
900 #[test]
901 fn test_fuzzy_match_version_boost() {
902 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
903
904 let mut a = Component::new("lodash-utils".to_string(), "comp-1".to_string());
906 a.version = Some("4.17.21".to_string());
907
908 let mut b = Component::new("lodash-util".to_string(), "comp-2".to_string());
909 b.version = Some("4.17.20".to_string()); let mut c = Component::new("lodash-util".to_string(), "comp-3".to_string());
912 c.version = Some("5.0.0".to_string()); let score_same_minor = matcher.match_components(&a, &b);
915 let score_diff_major = matcher.match_components(&a, &c);
916
917 assert!(score_same_minor > 0.0, "Same minor should match");
919 assert!(score_diff_major > 0.0, "Different major should still match");
920 assert!(
921 score_same_minor > score_diff_major,
922 "Same minor version should score higher: {} vs {}",
923 score_same_minor,
924 score_diff_major
925 );
926 }
927
928 #[test]
929 fn test_soundex_basic() {
930 assert_eq!(string_similarity::soundex("Robert"), "R163");
932 assert_eq!(string_similarity::soundex("Rupert"), "R163"); assert_eq!(string_similarity::soundex("Smith"), "S530");
934 assert_eq!(string_similarity::soundex("Smyth"), "S530"); }
936
937 #[test]
938 fn test_soundex_empty() {
939 assert_eq!(string_similarity::soundex(""), "");
940 assert_eq!(string_similarity::soundex("123"), ""); }
942
943 #[test]
944 fn test_phonetic_similarity_exact() {
945 let score = string_similarity::compute_phonetic_similarity("color", "colour");
946 assert_eq!(score, 1.0, "color and colour should match phonetically");
947 }
948
949 #[test]
950 fn test_phonetic_similarity_different() {
951 let score = string_similarity::compute_phonetic_similarity("react", "angular");
952 assert!(
953 score < 0.5,
954 "Different names should have low phonetic similarity"
955 );
956 }
957
958 #[test]
959 fn test_phonetic_similarity_compound() {
960 let score = string_similarity::compute_phonetic_similarity("json-parser", "jayson-parser");
962 assert!(
963 score > 0.5,
964 "Similar sounding compound names should match: {}",
965 score
966 );
967 }
968
969 #[test]
970 fn test_fuzzy_match_with_phonetic() {
971 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
972
973 let a = Component::new("color-utils".to_string(), "comp-1".to_string());
974 let b = Component::new("colour-utils".to_string(), "comp-2".to_string());
975
976 let score = matcher.match_components(&a, &b);
977 assert!(
978 score > 0.7,
979 "Phonetically similar names should match: {}",
980 score
981 );
982 }
983}