1pub mod adaptive;
30mod aliases;
31mod config;
32pub mod cross_ecosystem;
33pub mod custom_rules;
34pub mod ecosystem_config;
35pub mod index;
36pub mod lsh;
37mod purl;
38pub mod rule_engine;
39mod rules;
40pub mod scoring;
41pub mod string_similarity;
42mod traits;
43
44pub use adaptive::{
45 AdaptiveMatching, AdaptiveMethod, AdaptiveThreshold, AdaptiveThresholdConfig,
46 AdaptiveThresholdResult, ScoreStats,
47};
48pub use aliases::AliasTable;
49pub use config::{CrossEcosystemConfig, FuzzyMatchConfig, MultiFieldWeights};
50pub use cross_ecosystem::{CrossEcosystemDb, CrossEcosystemMatch, PackageFamily};
51pub use custom_rules::{
52 AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig, RulePrecedence,
53 RulesSummary,
54};
55pub use ecosystem_config::{
56 ConfigError, CustomEquivalence, CustomRules, EcosystemConfig, EcosystemRulesConfig,
57 GlobalSettings, GroupMigration, ImportMapping, NormalizationConfig, PackageGroup,
58 ScopeHandling, SecurityConfig, TyposquatEntry, VersionSpec, VersioningConfig,
59};
60pub use index::{
61 BatchCandidateConfig, BatchCandidateGenerator, BatchCandidateResult, BatchCandidateStats,
62 ComponentIndex, IndexStats, LazyComponentIndex, NormalizedEntry,
63};
64pub use lsh::{LshConfig, LshIndex, LshIndexStats, MinHashSignature};
65pub use purl::PurlNormalizer;
66pub use rule_engine::{AppliedRule, AppliedRuleType, RuleApplicationResult, RuleEngine};
67pub use rules::EcosystemRules;
68pub use traits::{
69 CacheConfig, CacheStats, CachedMatcher, ComponentMatcher, CompositeMatcher,
70 CompositeMatcherBuilder, MatchExplanation, MatchMetadata, MatchResult, MatchTier,
71 ScoreComponent,
72};
73pub use scoring::MultiFieldScoreResult;
74
75use crate::model::Component;
76use strsim::{jaro_winkler, levenshtein};
77
78#[must_use]
80pub struct FuzzyMatcher {
81 config: FuzzyMatchConfig,
82 alias_table: AliasTable,
83 purl_normalizer: PurlNormalizer,
84 ecosystem_rules: EcosystemRules,
85}
86
87impl FuzzyMatcher {
88 pub fn new(config: FuzzyMatchConfig) -> Self {
90 Self {
91 config,
92 alias_table: AliasTable::default(),
93 purl_normalizer: PurlNormalizer::new(),
94 ecosystem_rules: EcosystemRules::new(),
95 }
96 }
97
98 #[must_use]
100 pub const fn config(&self) -> &FuzzyMatchConfig {
101 &self.config
102 }
103
104 pub fn with_alias_table(mut self, table: AliasTable) -> Self {
106 self.alias_table = table;
107 self
108 }
109
110 #[must_use]
112 pub fn match_components(&self, a: &Component, b: &Component) -> f64 {
113 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
115 let norm_a = self.purl_normalizer.normalize(purl_a);
116 let norm_b = self.purl_normalizer.normalize(purl_b);
117 if norm_a == norm_b {
118 return 1.0;
119 }
120 }
121
122 if self.check_alias_match(a, b) {
124 return 0.95;
125 }
126
127 if let Some(score) = self.check_ecosystem_rules(a, b)
129 && score >= 0.90 {
130 return score;
131 }
132
133 if let Some(ref weights) = self.config.field_weights {
135 let result = self.compute_multi_field_score(a, b, weights);
137 if result.total >= self.config.threshold {
138 return result.total;
139 }
140 } else {
141 let fuzzy_score = self.compute_fuzzy_score(a, b);
143 if fuzzy_score >= self.config.threshold {
144 return fuzzy_score;
145 }
146 }
147
148 0.0
149 }
150
151 fn check_alias_match(&self, a: &Component, b: &Component) -> bool {
153 let names_a = self.get_all_names(a);
155 let names_b = self.get_all_names(b);
156
157 for name_a in &names_a {
158 if let Some(canonical) = self.alias_table.get_canonical(name_a) {
159 for name_b in &names_b {
160 if self.alias_table.is_alias(&canonical, name_b) {
161 return true;
162 }
163 }
164 }
165 }
166
167 false
168 }
169
170 fn get_all_names(&self, comp: &Component) -> Vec<String> {
172 let mut names = vec![comp.name.clone()];
173 names.extend(comp.identifiers.aliases.clone());
174
175 if let Some(purl) = &comp.identifiers.purl
177 && let Some(name) = self.extract_name_from_purl(purl) {
178 names.push(name);
179 }
180
181 names
182 }
183
184 fn extract_name_from_purl(&self, purl: &str) -> Option<String> {
186 let without_pkg = purl.strip_prefix("pkg:")?;
188 let parts: Vec<&str> = without_pkg.split('/').collect();
189
190 if parts.len() >= 2 {
191 let name_part = parts.last()?;
192 let name = name_part.split('@').next()?;
194 Some(name.to_string())
195 } else {
196 None
197 }
198 }
199
200 fn check_ecosystem_rules(&self, a: &Component, b: &Component) -> Option<f64> {
202 let ecosystem_a = a.ecosystem.as_ref()?;
203 let ecosystem_b = b.ecosystem.as_ref()?;
204
205 if ecosystem_a != ecosystem_b {
207 return None;
208 }
209
210 let norm_a = self.ecosystem_rules.normalize_name(&a.name, ecosystem_a);
211 let norm_b = self.ecosystem_rules.normalize_name(&b.name, ecosystem_b);
212
213 if norm_a == norm_b {
214 return Some(0.90);
215 }
216
217 None
218 }
219
220 fn compute_fuzzy_score(&self, a: &Component, b: &Component) -> f64 {
222 let name_a = a.name.to_lowercase();
223 let name_b = b.name.to_lowercase();
224
225 let jw_score = jaro_winkler(&name_a, &name_b);
227
228 let max_len = name_a.len().max(name_b.len());
230 let lev_distance = levenshtein(&name_a, &name_b);
231 let lev_score = if max_len > 0 {
232 1.0 - (lev_distance as f64 / max_len as f64)
233 } else {
234 1.0
235 };
236
237 let token_score = Self::compute_token_similarity(&name_a, &name_b);
239
240 let phonetic_score = Self::compute_phonetic_similarity(&name_a, &name_b);
242
243 let char_score = jw_score.mul_add(self.config.jaro_winkler_weight, lev_score * self.config.levenshtein_weight);
245
246 let combined = char_score.max(token_score).max(phonetic_score * 0.85);
249
250 let version_boost = Self::compute_version_similarity(a.version.as_ref(), b.version.as_ref());
252
253 (combined + version_boost).min(1.0)
254 }
255
256 fn compute_token_similarity(name_a: &str, name_b: &str) -> f64 {
258 string_similarity::compute_token_similarity(name_a, name_b)
259 }
260
261 fn compute_version_similarity(va: Option<&String>, vb: Option<&String>) -> f64 {
263 string_similarity::compute_version_similarity(va, vb)
264 }
265
266 #[must_use]
268 pub fn compute_phonetic_similarity(name_a: &str, name_b: &str) -> f64 {
269 string_similarity::compute_phonetic_similarity(name_a, name_b)
270 }
271
272 #[must_use]
276 pub fn compute_multi_field_score(
277 &self,
278 a: &Component,
279 b: &Component,
280 weights: &config::MultiFieldWeights,
281 ) -> scoring::MultiFieldScoreResult {
282 use std::collections::HashSet;
283
284 let mut result = scoring::MultiFieldScoreResult::default();
285
286 let name_score = self.compute_fuzzy_score(a, b);
288 result.name_score = name_score;
289 result.total += name_score * weights.name;
290
291 let version_score = if weights.version_divergence_enabled {
293 scoring::compute_version_divergence_score(&a.version, &b.version, weights)
294 } else {
295 match (&a.version, &b.version) {
297 (Some(va), Some(vb)) if va == vb => 1.0,
298 (None, None) => 0.5, _ => 0.0,
300 }
301 };
302 result.version_score = version_score;
303 result.total += version_score * weights.version;
304
305 let (ecosystem_score, ecosystem_penalty) = match (&a.ecosystem, &b.ecosystem) {
307 (Some(ea), Some(eb)) if ea == eb => (1.0, 0.0),
308 (None, None) => (0.5, 0.0), (Some(_), Some(_)) => (0.0, weights.ecosystem_mismatch_penalty), _ => (0.0, 0.0), };
312 result.ecosystem_score = ecosystem_score;
313 result.total += ecosystem_score.mul_add(weights.ecosystem, ecosystem_penalty);
314
315 let licenses_a: HashSet<_> = a
317 .licenses
318 .declared
319 .iter()
320 .map(|l| l.expression.as_str())
321 .collect();
322 let licenses_b: HashSet<_> = b
323 .licenses
324 .declared
325 .iter()
326 .map(|l| l.expression.as_str())
327 .collect();
328 let license_score = if licenses_a.is_empty() && licenses_b.is_empty() {
329 0.5 } else if licenses_a.is_empty() || licenses_b.is_empty() {
331 0.0 } else {
333 let intersection = licenses_a.intersection(&licenses_b).count();
334 let union = licenses_a.union(&licenses_b).count();
335 if union > 0 {
336 intersection as f64 / union as f64
337 } else {
338 0.0
339 }
340 };
341 result.license_score = license_score;
342 result.total += license_score * weights.licenses;
343
344 let supplier_score = match (&a.supplier, &b.supplier) {
346 (Some(sa), Some(sb)) if sa.name.to_lowercase() == sb.name.to_lowercase() => 1.0,
347 (None, None) => 0.5, _ => 0.0,
349 };
350 result.supplier_score = supplier_score;
351 result.total += supplier_score * weights.supplier;
352
353 let group_score = match (&a.group, &b.group) {
355 (Some(ga), Some(gb)) if ga.to_lowercase() == gb.to_lowercase() => 1.0,
356 (None, None) => 0.5, _ => 0.0,
358 };
359 result.group_score = group_score;
360 result.total += group_score * weights.group;
361
362 result.total = result.total.clamp(0.0, 1.0);
364
365 result
366 }
367}
368
369impl Default for FuzzyMatcher {
370 fn default() -> Self {
371 Self::new(FuzzyMatchConfig::balanced())
372 }
373}
374
375impl ComponentMatcher for FuzzyMatcher {
376 fn match_score(&self, a: &Component, b: &Component) -> f64 {
377 self.match_components(a, b)
378 }
379
380 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
381 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
383 let norm_a = self.purl_normalizer.normalize(purl_a);
384 let norm_b = self.purl_normalizer.normalize(purl_b);
385 if norm_a == norm_b {
386 return MatchResult::with_metadata(
387 1.0,
388 MatchTier::ExactIdentifier,
389 MatchMetadata {
390 matched_fields: vec!["purl".to_string()],
391 normalization: Some("purl_normalized".to_string()),
392 rule_id: None,
393 },
394 );
395 }
396 }
397
398 if self.check_alias_match(a, b) {
400 return MatchResult::with_metadata(
401 0.95,
402 MatchTier::Alias,
403 MatchMetadata {
404 matched_fields: vec!["name".to_string()],
405 normalization: Some("alias_table".to_string()),
406 rule_id: None,
407 },
408 );
409 }
410
411 if let Some(score) = self.check_ecosystem_rules(a, b)
413 && score >= 0.90 {
414 return MatchResult::with_metadata(
415 score,
416 MatchTier::EcosystemRule,
417 MatchMetadata {
418 matched_fields: vec!["name".to_string(), "ecosystem".to_string()],
419 normalization: Some("ecosystem_rules".to_string()),
420 rule_id: None,
421 },
422 );
423 }
424
425 let fuzzy_score = self.compute_fuzzy_score(a, b);
427 if fuzzy_score >= self.config.threshold {
428 return MatchResult::with_metadata(
429 fuzzy_score,
430 MatchTier::Fuzzy,
431 MatchMetadata {
432 matched_fields: vec!["name".to_string()],
433 normalization: Some("fuzzy_similarity".to_string()),
434 rule_id: None,
435 },
436 );
437 }
438
439 MatchResult::no_match()
440 }
441
442 fn name(&self) -> &'static str {
443 "FuzzyMatcher"
444 }
445
446 fn threshold(&self) -> f64 {
447 self.config.threshold
448 }
449
450 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
451 use strsim::{jaro_winkler, levenshtein};
452
453 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
455 let norm_a = self.purl_normalizer.normalize(purl_a);
456 let norm_b = self.purl_normalizer.normalize(purl_b);
457 if norm_a == norm_b {
458 return MatchExplanation::matched(
459 MatchTier::ExactIdentifier,
460 1.0,
461 format!(
462 "Exact PURL match: '{purl_a}' equals '{purl_b}' after normalization"
463 ),
464 )
465 .with_normalization("purl_normalized");
466 }
467 }
468
469 if self.check_alias_match(a, b) {
471 return MatchExplanation::matched(
472 MatchTier::Alias,
473 0.95,
474 format!(
475 "'{}' and '{}' are known aliases of the same package",
476 a.name, b.name
477 ),
478 )
479 .with_normalization("alias_table");
480 }
481
482 if let Some(score) = self.check_ecosystem_rules(a, b)
484 && score >= 0.90 {
485 let ecosystem = a
486 .ecosystem
487 .as_ref().map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
488 return MatchExplanation::matched(
489 MatchTier::EcosystemRule,
490 score,
491 format!(
492 "Names match after {} ecosystem normalization: '{}' -> '{}'",
493 ecosystem, a.name, b.name
494 ),
495 )
496 .with_normalization(format!("{ecosystem}_normalization"));
497 }
498
499 let name_a = a.name.to_lowercase();
501 let name_b = b.name.to_lowercase();
502
503 let jw_score = jaro_winkler(&name_a, &name_b);
504 let max_len = name_a.len().max(name_b.len());
505 let lev_distance = levenshtein(&name_a, &name_b);
506 let lev_score = if max_len > 0 {
507 1.0 - (lev_distance as f64 / max_len as f64)
508 } else {
509 1.0
510 };
511
512 let jw_weighted = jw_score * self.config.jaro_winkler_weight;
513 let lev_weighted = lev_score * self.config.levenshtein_weight;
514
515 let version_boost = if a.version == b.version && a.version.is_some() {
516 0.05
517 } else {
518 0.0
519 };
520
521 let combined = (jw_weighted + lev_weighted + version_boost).min(1.0);
522
523 let mut explanation = if combined >= self.config.threshold {
524 MatchExplanation::matched(
525 MatchTier::Fuzzy,
526 combined,
527 format!(
528 "Fuzzy match: '{}' ~ '{}' with {:.0}% similarity",
529 a.name,
530 b.name,
531 combined * 100.0
532 ),
533 )
534 } else {
535 MatchExplanation::no_match(format!(
536 "Fuzzy similarity {:.2} below threshold {:.2}",
537 combined, self.config.threshold
538 ))
539 };
540
541 explanation = explanation
543 .with_score_component(ScoreComponent {
544 name: "Jaro-Winkler".to_string(),
545 weight: self.config.jaro_winkler_weight,
546 raw_score: jw_score,
547 weighted_score: jw_weighted,
548 description: format!("'{name_a}' vs '{name_b}' = {jw_score:.2}"),
549 })
550 .with_score_component(ScoreComponent {
551 name: "Levenshtein".to_string(),
552 weight: self.config.levenshtein_weight,
553 raw_score: lev_score,
554 weighted_score: lev_weighted,
555 description: format!(
556 "edit distance {lev_distance} / max_len {max_len} = {lev_score:.2}"
557 ),
558 });
559
560 if version_boost > 0.0 {
561 explanation = explanation.with_score_component(ScoreComponent {
562 name: "Version boost".to_string(),
563 weight: 1.0,
564 raw_score: version_boost,
565 weighted_score: version_boost,
566 description: format!("versions match: {:?}", a.version),
567 });
568 }
569
570 explanation.with_normalization("lowercase")
571 }
572}
573
574#[cfg(test)]
575mod tests {
576 use super::*;
577
578 #[test]
579 fn test_exact_purl_match() {
580 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
581
582 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
583 a.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
584
585 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
586 b.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
587
588 assert_eq!(matcher.match_components(&a, &b), 1.0);
589 }
590
591 #[test]
592 fn test_fuzzy_name_match() {
593 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
594
595 let a = Component::new("lodash-es".to_string(), "comp-1".to_string());
597 let b = Component::new("lodash".to_string(), "comp-2".to_string());
598
599 let score = matcher.match_components(&a, &b);
600 assert!(
602 score >= 0.70,
603 "lodash-es vs lodash should have score >= 0.70, got {}",
604 score
605 );
606 }
607
608 #[test]
609 fn test_different_names_low_score() {
610 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::strict());
611
612 let a = Component::new("react".to_string(), "comp-1".to_string());
613 let b = Component::new("angular".to_string(), "comp-2".to_string());
614
615 let score = matcher.match_components(&a, &b);
616 assert!(
617 score < 0.5,
618 "react vs angular should have low score, got {}",
619 score
620 );
621 }
622
623 #[test]
624 fn test_multi_field_weights_normalized() {
625 let weights = config::MultiFieldWeights::balanced();
626 assert!(
627 weights.is_normalized(),
628 "Balanced weights should be normalized"
629 );
630
631 let weights = config::MultiFieldWeights::name_focused();
632 assert!(
633 weights.is_normalized(),
634 "Name-focused weights should be normalized"
635 );
636
637 let weights = config::MultiFieldWeights::security_focused();
638 assert!(
639 weights.is_normalized(),
640 "Security-focused weights should be normalized"
641 );
642 }
643
644 #[test]
645 fn test_multi_field_scoring_same_component() {
646 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
647 let weights = config::MultiFieldWeights::balanced();
648
649 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
650 a.version = Some("4.17.21".to_string());
651 a.ecosystem = Some(crate::model::Ecosystem::Npm);
652
653 let result = matcher.compute_multi_field_score(&a, &a, &weights);
656 assert!(
657 result.total > 0.90,
658 "Same component should score > 0.90, got {}",
659 result.total
660 );
661 assert_eq!(result.name_score, 1.0);
662 assert_eq!(result.version_score, 1.0);
663 assert_eq!(result.ecosystem_score, 1.0);
664 assert_eq!(
666 result.license_score, 0.5,
667 "Empty licenses should be neutral"
668 );
669 assert_eq!(
670 result.supplier_score, 0.5,
671 "Empty supplier should be neutral"
672 );
673 assert_eq!(result.group_score, 0.5, "Empty group should be neutral");
674 }
675
676 #[test]
677 fn test_multi_field_scoring_different_versions() {
678 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
679 let weights = config::MultiFieldWeights::balanced();
680
681 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
682 a.version = Some("4.17.21".to_string());
683 a.ecosystem = Some(crate::model::Ecosystem::Npm);
684
685 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
686 b.version = Some("4.17.20".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
688
689 let result = matcher.compute_multi_field_score(&a, &b, &weights);
690
691 assert!(result.name_score > 0.9, "Name score should be > 0.9");
693
694 assert!(
698 result.version_score > 0.7,
699 "Same major.minor with patch diff should score high, got {}",
700 result.version_score
701 );
702
703 assert_eq!(
705 result.ecosystem_score, 1.0,
706 "Same ecosystem should score 1.0"
707 );
708
709 assert!(
711 result.total > 0.8,
712 "Total should be > 0.8, got {}",
713 result.total
714 );
715 }
716
717 #[test]
718 fn test_multi_field_scoring_different_major_versions() {
719 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
720 let weights = config::MultiFieldWeights::balanced();
721
722 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
723 a.version = Some("4.17.21".to_string());
724 a.ecosystem = Some(crate::model::Ecosystem::Npm);
725
726 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
727 b.version = Some("3.10.0".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
729
730 let result = matcher.compute_multi_field_score(&a, &b, &weights);
731
732 assert!(
736 result.version_score < 0.3,
737 "Different major versions should score low, got {}",
738 result.version_score
739 );
740 }
741
742 #[test]
743 fn test_multi_field_scoring_legacy_weights() {
744 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
746 let weights = config::MultiFieldWeights::legacy();
747
748 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
749 a.version = Some("4.17.21".to_string());
750 a.ecosystem = Some(crate::model::Ecosystem::Npm);
751
752 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
753 b.version = Some("4.17.20".to_string());
754 b.ecosystem = Some(crate::model::Ecosystem::Npm);
755
756 let result = matcher.compute_multi_field_score(&a, &b, &weights);
757
758 assert_eq!(
760 result.version_score, 0.0,
761 "Legacy mode: different versions should score 0"
762 );
763 }
764
765 #[test]
766 fn test_multi_field_config_preset() {
767 let config = FuzzyMatchConfig::from_preset("balanced-multi").unwrap();
768 assert!(config.field_weights.is_some());
769
770 let config = FuzzyMatchConfig::from_preset("strict_multi").unwrap();
771 assert!(config.field_weights.is_some());
772 }
773
774 #[test]
775 fn test_multi_field_score_result_summary() {
776 let result = MultiFieldScoreResult {
777 total: 0.85,
778 name_score: 1.0,
779 version_score: 0.0,
780 ecosystem_score: 1.0,
781 license_score: 0.5,
782 supplier_score: 0.5,
783 group_score: 0.5,
784 };
785
786 let summary = result.summary();
787 assert!(summary.contains("0.85"));
788 assert!(summary.contains("name: 1.00"));
789 }
790
791 #[test]
792 fn test_token_similarity_exact() {
793 let score = string_similarity::compute_token_similarity("react-dom", "react-dom");
794 assert_eq!(score, 1.0);
795 }
796
797 #[test]
798 fn test_token_similarity_reordered() {
799 let score = string_similarity::compute_token_similarity("react-dom", "dom-react");
801 assert_eq!(score, 1.0, "Reordered tokens should match perfectly");
802 }
803
804 #[test]
805 fn test_token_similarity_partial() {
806 let score = string_similarity::compute_token_similarity("react-dom-utils", "react-dom");
808 assert!(
810 (score - 0.667).abs() < 0.01,
811 "Partial overlap should be ~0.67, got {}",
812 score
813 );
814 }
815
816 #[test]
817 fn test_token_similarity_different_delimiters() {
818 let score = string_similarity::compute_token_similarity("my_package_name", "my-package-name");
820 assert_eq!(score, 1.0, "Different delimiters should match");
821 }
822
823 #[test]
824 fn test_token_similarity_no_overlap() {
825 let score = string_similarity::compute_token_similarity("react", "angular");
826 assert_eq!(score, 0.0, "No common tokens should score 0");
827 }
828
829 #[test]
830 fn test_version_similarity_exact() {
831 let v1 = "1.2.3".to_string();
832 let v2 = "1.2.3".to_string();
833 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
834 assert_eq!(score, 0.10, "Exact version match should give max boost");
835 }
836
837 #[test]
838 fn test_version_similarity_same_major_minor() {
839 let v1 = "1.2.3".to_string();
840 let v2 = "1.2.4".to_string();
841 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
842 assert_eq!(score, 0.07, "Same major.minor should give 0.07 boost");
843 }
844
845 #[test]
846 fn test_version_similarity_same_major() {
847 let v1 = "1.2.3".to_string();
848 let v2 = "1.5.0".to_string();
849 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
850 assert_eq!(score, 0.04, "Same major should give 0.04 boost");
851 }
852
853 #[test]
854 fn test_version_similarity_different_major() {
855 let v1 = "1.2.3".to_string();
856 let v2 = "2.0.0".to_string();
857 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
858 assert_eq!(score, 0.0, "Different major versions should give no boost");
859 }
860
861 #[test]
862 fn test_version_similarity_prerelease() {
863 let v1 = "1.2.3-beta".to_string();
865 let v2 = "1.2.4".to_string();
866 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
867 assert_eq!(score, 0.07, "Prerelease should still match major.minor");
868 }
869
870 #[test]
871 fn test_version_similarity_missing() {
872 let v = "1.0.0".to_string();
873 let score = FuzzyMatcher::compute_version_similarity(None, Some(&v));
874 assert_eq!(score, 0.0, "Missing version should give no boost");
875
876 let score = FuzzyMatcher::compute_version_similarity(None, None);
877 assert_eq!(score, 0.0, "Both missing should give no boost");
878 }
879
880 #[test]
881 fn test_fuzzy_match_with_reordered_tokens() {
882 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
883
884 let a = Component::new("react-dom".to_string(), "comp-1".to_string());
885 let b = Component::new("dom-react".to_string(), "comp-2".to_string());
886
887 let score = matcher.match_components(&a, &b);
888 assert!(
890 score > 0.5,
891 "Reordered names should still match, got {}",
892 score
893 );
894 }
895
896 #[test]
897 fn test_fuzzy_match_version_boost() {
898 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
899
900 let mut a = Component::new("lodash-utils".to_string(), "comp-1".to_string());
902 a.version = Some("4.17.21".to_string());
903
904 let mut b = Component::new("lodash-util".to_string(), "comp-2".to_string());
905 b.version = Some("4.17.20".to_string()); let mut c = Component::new("lodash-util".to_string(), "comp-3".to_string());
908 c.version = Some("5.0.0".to_string()); let score_same_minor = matcher.match_components(&a, &b);
911 let score_diff_major = matcher.match_components(&a, &c);
912
913 assert!(score_same_minor > 0.0, "Same minor should match");
915 assert!(score_diff_major > 0.0, "Different major should still match");
916 assert!(
917 score_same_minor > score_diff_major,
918 "Same minor version should score higher: {} vs {}",
919 score_same_minor,
920 score_diff_major
921 );
922 }
923
924 #[test]
925 fn test_soundex_basic() {
926 assert_eq!(string_similarity::soundex("Robert"), "R163");
928 assert_eq!(string_similarity::soundex("Rupert"), "R163"); assert_eq!(string_similarity::soundex("Smith"), "S530");
930 assert_eq!(string_similarity::soundex("Smyth"), "S530"); }
932
933 #[test]
934 fn test_soundex_empty() {
935 assert_eq!(string_similarity::soundex(""), "");
936 assert_eq!(string_similarity::soundex("123"), ""); }
938
939 #[test]
940 fn test_phonetic_similarity_exact() {
941 let score = string_similarity::compute_phonetic_similarity("color", "colour");
942 assert_eq!(score, 1.0, "color and colour should match phonetically");
943 }
944
945 #[test]
946 fn test_phonetic_similarity_different() {
947 let score = string_similarity::compute_phonetic_similarity("react", "angular");
948 assert!(
949 score < 0.5,
950 "Different names should have low phonetic similarity"
951 );
952 }
953
954 #[test]
955 fn test_phonetic_similarity_compound() {
956 let score = string_similarity::compute_phonetic_similarity("json-parser", "jayson-parser");
958 assert!(
959 score > 0.5,
960 "Similar sounding compound names should match: {}",
961 score
962 );
963 }
964
965 #[test]
966 fn test_fuzzy_match_with_phonetic() {
967 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
968
969 let a = Component::new("color-utils".to_string(), "comp-1".to_string());
970 let b = Component::new("colour-utils".to_string(), "comp-2".to_string());
971
972 let score = matcher.match_components(&a, &b);
973 assert!(
974 score > 0.7,
975 "Phonetically similar names should match: {}",
976 score
977 );
978 }
979}