1pub mod adaptive;
30mod aliases;
31mod config;
32pub mod cross_ecosystem;
33pub mod custom_rules;
34pub mod ecosystem_config;
35pub mod index;
36pub mod lsh;
37mod purl;
38pub mod rule_engine;
39mod rules;
40pub mod scoring;
41pub mod string_similarity;
42mod traits;
43
44pub use adaptive::{
45 AdaptiveMatching, AdaptiveMethod, AdaptiveThreshold, AdaptiveThresholdConfig,
46 AdaptiveThresholdResult, ScoreStats,
47};
48pub use aliases::AliasTable;
49pub use config::{CrossEcosystemConfig, FuzzyMatchConfig, MultiFieldWeights};
50pub use cross_ecosystem::{CrossEcosystemDb, CrossEcosystemMatch, PackageFamily};
51pub use custom_rules::{
52 AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig, RulePrecedence,
53 RulesSummary,
54};
55pub use ecosystem_config::{
56 ConfigError, CustomEquivalence, CustomRules, EcosystemConfig, EcosystemRulesConfig,
57 GlobalSettings, GroupMigration, ImportMapping, NormalizationConfig, PackageGroup,
58 ScopeHandling, SecurityConfig, TyposquatEntry, VersionSpec, VersioningConfig,
59};
60pub use index::{
61 BatchCandidateConfig, BatchCandidateGenerator, BatchCandidateResult, BatchCandidateStats,
62 ComponentIndex, IndexStats, LazyComponentIndex, NormalizedEntry,
63};
64pub use lsh::{LshConfig, LshIndex, LshIndexStats, MinHashSignature};
65pub use purl::PurlNormalizer;
66pub use rule_engine::{AppliedRule, AppliedRuleType, RuleApplicationResult, RuleEngine};
67pub use rules::EcosystemRules;
68pub use scoring::MultiFieldScoreResult;
69pub use traits::{
70 CacheConfig, CacheStats, CachedMatcher, ComponentMatcher, CompositeMatcher,
71 CompositeMatcherBuilder, MatchExplanation, MatchMetadata, MatchResult, MatchTier,
72 ScoreComponent,
73};
74
75use crate::model::Component;
76use strsim::{jaro_winkler, levenshtein};
77
78#[must_use]
80pub struct FuzzyMatcher {
81 config: FuzzyMatchConfig,
82 alias_table: AliasTable,
83 purl_normalizer: PurlNormalizer,
84 ecosystem_rules: EcosystemRules,
85}
86
87impl FuzzyMatcher {
88 pub fn new(config: FuzzyMatchConfig) -> Self {
90 Self {
91 config,
92 alias_table: AliasTable::default(),
93 purl_normalizer: PurlNormalizer::new(),
94 ecosystem_rules: EcosystemRules::new(),
95 }
96 }
97
98 #[must_use]
100 pub const fn config(&self) -> &FuzzyMatchConfig {
101 &self.config
102 }
103
104 pub fn with_alias_table(mut self, table: AliasTable) -> Self {
106 self.alias_table = table;
107 self
108 }
109
110 #[must_use]
112 pub fn match_components(&self, a: &Component, b: &Component) -> f64 {
113 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
115 let norm_a = self.purl_normalizer.normalize(purl_a);
116 let norm_b = self.purl_normalizer.normalize(purl_b);
117 if norm_a == norm_b {
118 return 1.0;
119 }
120 }
121
122 if self.check_alias_match(a, b) {
124 return 0.95;
125 }
126
127 if let Some(score) = self.check_ecosystem_rules(a, b)
129 && score >= 0.90
130 {
131 return score;
132 }
133
134 if let Some(ref weights) = self.config.field_weights {
136 let result = self.compute_multi_field_score(a, b, weights);
138 if result.total >= self.config.threshold {
139 return result.total;
140 }
141 } else {
142 let fuzzy_score = self.compute_fuzzy_score(a, b);
144 if fuzzy_score >= self.config.threshold {
145 return fuzzy_score;
146 }
147 }
148
149 0.0
150 }
151
152 fn check_alias_match(&self, a: &Component, b: &Component) -> bool {
154 let names_a = self.get_all_names(a);
156 let names_b = self.get_all_names(b);
157
158 for name_a in &names_a {
159 if let Some(canonical) = self.alias_table.get_canonical(name_a) {
160 for name_b in &names_b {
161 if self.alias_table.is_alias(&canonical, name_b) {
162 return true;
163 }
164 }
165 }
166 }
167
168 false
169 }
170
171 fn get_all_names(&self, comp: &Component) -> Vec<String> {
173 let mut names = vec![comp.name.clone()];
174 names.extend(comp.identifiers.aliases.clone());
175
176 if let Some(purl) = &comp.identifiers.purl
178 && let Some(name) = self.extract_name_from_purl(purl)
179 {
180 names.push(name);
181 }
182
183 names
184 }
185
186 fn extract_name_from_purl(&self, purl: &str) -> Option<String> {
188 let without_pkg = purl.strip_prefix("pkg:")?;
190 let parts: Vec<&str> = without_pkg.split('/').collect();
191
192 if parts.len() >= 2 {
193 let name_part = parts.last()?;
194 let name = name_part.split('@').next()?;
196 Some(name.to_string())
197 } else {
198 None
199 }
200 }
201
202 fn check_ecosystem_rules(&self, a: &Component, b: &Component) -> Option<f64> {
204 let ecosystem_a = a.ecosystem.as_ref()?;
205 let ecosystem_b = b.ecosystem.as_ref()?;
206
207 if ecosystem_a != ecosystem_b {
209 return None;
210 }
211
212 let norm_a = self.ecosystem_rules.normalize_name(&a.name, ecosystem_a);
213 let norm_b = self.ecosystem_rules.normalize_name(&b.name, ecosystem_b);
214
215 if norm_a == norm_b {
216 return Some(0.90);
217 }
218
219 None
220 }
221
222 fn compute_fuzzy_score(&self, a: &Component, b: &Component) -> f64 {
224 let name_a = a.name.to_lowercase();
225 let name_b = b.name.to_lowercase();
226
227 let jw_score = jaro_winkler(&name_a, &name_b);
229
230 let max_len = name_a.len().max(name_b.len());
232 let lev_distance = levenshtein(&name_a, &name_b);
233 let lev_score = if max_len > 0 {
234 1.0 - (lev_distance as f64 / max_len as f64)
235 } else {
236 1.0
237 };
238
239 let token_score = Self::compute_token_similarity(&name_a, &name_b);
241
242 let phonetic_score = Self::compute_phonetic_similarity(&name_a, &name_b);
244
245 let char_score = jw_score.mul_add(
247 self.config.jaro_winkler_weight,
248 lev_score * self.config.levenshtein_weight,
249 );
250
251 let combined = char_score.max(token_score).max(phonetic_score * 0.85);
254
255 let version_boost =
257 Self::compute_version_similarity(a.version.as_ref(), b.version.as_ref());
258
259 (combined + version_boost).min(1.0)
260 }
261
262 fn compute_token_similarity(name_a: &str, name_b: &str) -> f64 {
264 string_similarity::compute_token_similarity(name_a, name_b)
265 }
266
267 fn compute_version_similarity(va: Option<&String>, vb: Option<&String>) -> f64 {
269 string_similarity::compute_version_similarity(va, vb)
270 }
271
272 #[must_use]
274 pub fn compute_phonetic_similarity(name_a: &str, name_b: &str) -> f64 {
275 string_similarity::compute_phonetic_similarity(name_a, name_b)
276 }
277
278 #[must_use]
282 pub fn compute_multi_field_score(
283 &self,
284 a: &Component,
285 b: &Component,
286 weights: &config::MultiFieldWeights,
287 ) -> scoring::MultiFieldScoreResult {
288 use std::collections::HashSet;
289
290 let mut result = scoring::MultiFieldScoreResult::default();
291
292 let name_score = self.compute_fuzzy_score(a, b);
294 result.name_score = name_score;
295 result.total += name_score * weights.name;
296
297 let version_score = if weights.version_divergence_enabled {
299 scoring::compute_version_divergence_score(&a.version, &b.version, weights)
300 } else {
301 match (&a.version, &b.version) {
303 (Some(va), Some(vb)) if va == vb => 1.0,
304 (None, None) => 0.5, _ => 0.0,
306 }
307 };
308 result.version_score = version_score;
309 result.total += version_score * weights.version;
310
311 let (ecosystem_score, ecosystem_penalty) = match (&a.ecosystem, &b.ecosystem) {
313 (Some(ea), Some(eb)) if ea == eb => (1.0, 0.0),
314 (None, None) => (0.5, 0.0), (Some(_), Some(_)) => (0.0, weights.ecosystem_mismatch_penalty), _ => (0.0, 0.0), };
318 result.ecosystem_score = ecosystem_score;
319 result.total += ecosystem_score.mul_add(weights.ecosystem, ecosystem_penalty);
320
321 let licenses_a: HashSet<_> = a
323 .licenses
324 .declared
325 .iter()
326 .map(|l| l.expression.as_str())
327 .collect();
328 let licenses_b: HashSet<_> = b
329 .licenses
330 .declared
331 .iter()
332 .map(|l| l.expression.as_str())
333 .collect();
334 let license_score = if licenses_a.is_empty() && licenses_b.is_empty() {
335 0.5 } else if licenses_a.is_empty() || licenses_b.is_empty() {
337 0.0 } else {
339 let intersection = licenses_a.intersection(&licenses_b).count();
340 let union = licenses_a.union(&licenses_b).count();
341 if union > 0 {
342 intersection as f64 / union as f64
343 } else {
344 0.0
345 }
346 };
347 result.license_score = license_score;
348 result.total += license_score * weights.licenses;
349
350 let supplier_score = match (&a.supplier, &b.supplier) {
352 (Some(sa), Some(sb)) if sa.name.to_lowercase() == sb.name.to_lowercase() => 1.0,
353 (None, None) => 0.5, _ => 0.0,
355 };
356 result.supplier_score = supplier_score;
357 result.total += supplier_score * weights.supplier;
358
359 let group_score = match (&a.group, &b.group) {
361 (Some(ga), Some(gb)) if ga.to_lowercase() == gb.to_lowercase() => 1.0,
362 (None, None) => 0.5, _ => 0.0,
364 };
365 result.group_score = group_score;
366 result.total += group_score * weights.group;
367
368 result.total = result.total.clamp(0.0, 1.0);
370
371 result
372 }
373}
374
375impl Default for FuzzyMatcher {
376 fn default() -> Self {
377 Self::new(FuzzyMatchConfig::balanced())
378 }
379}
380
381impl ComponentMatcher for FuzzyMatcher {
382 fn match_score(&self, a: &Component, b: &Component) -> f64 {
383 self.match_components(a, b)
384 }
385
386 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
387 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
389 let norm_a = self.purl_normalizer.normalize(purl_a);
390 let norm_b = self.purl_normalizer.normalize(purl_b);
391 if norm_a == norm_b {
392 return MatchResult::with_metadata(
393 1.0,
394 MatchTier::ExactIdentifier,
395 MatchMetadata {
396 matched_fields: vec!["purl".to_string()],
397 normalization: Some("purl_normalized".to_string()),
398 rule_id: None,
399 },
400 );
401 }
402 }
403
404 if self.check_alias_match(a, b) {
406 return MatchResult::with_metadata(
407 0.95,
408 MatchTier::Alias,
409 MatchMetadata {
410 matched_fields: vec!["name".to_string()],
411 normalization: Some("alias_table".to_string()),
412 rule_id: None,
413 },
414 );
415 }
416
417 if let Some(score) = self.check_ecosystem_rules(a, b)
419 && score >= 0.90
420 {
421 return MatchResult::with_metadata(
422 score,
423 MatchTier::EcosystemRule,
424 MatchMetadata {
425 matched_fields: vec!["name".to_string(), "ecosystem".to_string()],
426 normalization: Some("ecosystem_rules".to_string()),
427 rule_id: None,
428 },
429 );
430 }
431
432 let fuzzy_score = self.compute_fuzzy_score(a, b);
434 if fuzzy_score >= self.config.threshold {
435 return MatchResult::with_metadata(
436 fuzzy_score,
437 MatchTier::Fuzzy,
438 MatchMetadata {
439 matched_fields: vec!["name".to_string()],
440 normalization: Some("fuzzy_similarity".to_string()),
441 rule_id: None,
442 },
443 );
444 }
445
446 MatchResult::no_match()
447 }
448
449 fn name(&self) -> &'static str {
450 "FuzzyMatcher"
451 }
452
453 fn threshold(&self) -> f64 {
454 self.config.threshold
455 }
456
457 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
458 use strsim::{jaro_winkler, levenshtein};
459
460 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
462 let norm_a = self.purl_normalizer.normalize(purl_a);
463 let norm_b = self.purl_normalizer.normalize(purl_b);
464 if norm_a == norm_b {
465 return MatchExplanation::matched(
466 MatchTier::ExactIdentifier,
467 1.0,
468 format!("Exact PURL match: '{purl_a}' equals '{purl_b}' after normalization"),
469 )
470 .with_normalization("purl_normalized");
471 }
472 }
473
474 if self.check_alias_match(a, b) {
476 return MatchExplanation::matched(
477 MatchTier::Alias,
478 0.95,
479 format!(
480 "'{}' and '{}' are known aliases of the same package",
481 a.name, b.name
482 ),
483 )
484 .with_normalization("alias_table");
485 }
486
487 if let Some(score) = self.check_ecosystem_rules(a, b)
489 && score >= 0.90
490 {
491 let ecosystem = a
492 .ecosystem
493 .as_ref()
494 .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
495 return MatchExplanation::matched(
496 MatchTier::EcosystemRule,
497 score,
498 format!(
499 "Names match after {} ecosystem normalization: '{}' -> '{}'",
500 ecosystem, a.name, b.name
501 ),
502 )
503 .with_normalization(format!("{ecosystem}_normalization"));
504 }
505
506 let name_a = a.name.to_lowercase();
508 let name_b = b.name.to_lowercase();
509
510 let jw_score = jaro_winkler(&name_a, &name_b);
511 let max_len = name_a.len().max(name_b.len());
512 let lev_distance = levenshtein(&name_a, &name_b);
513 let lev_score = if max_len > 0 {
514 1.0 - (lev_distance as f64 / max_len as f64)
515 } else {
516 1.0
517 };
518
519 let jw_weighted = jw_score * self.config.jaro_winkler_weight;
520 let lev_weighted = lev_score * self.config.levenshtein_weight;
521
522 let version_boost = if a.version == b.version && a.version.is_some() {
523 0.05
524 } else {
525 0.0
526 };
527
528 let combined = (jw_weighted + lev_weighted + version_boost).min(1.0);
529
530 let mut explanation = if combined >= self.config.threshold {
531 MatchExplanation::matched(
532 MatchTier::Fuzzy,
533 combined,
534 format!(
535 "Fuzzy match: '{}' ~ '{}' with {:.0}% similarity",
536 a.name,
537 b.name,
538 combined * 100.0
539 ),
540 )
541 } else {
542 MatchExplanation::no_match(format!(
543 "Fuzzy similarity {:.2} below threshold {:.2}",
544 combined, self.config.threshold
545 ))
546 };
547
548 explanation = explanation
550 .with_score_component(ScoreComponent {
551 name: "Jaro-Winkler".to_string(),
552 weight: self.config.jaro_winkler_weight,
553 raw_score: jw_score,
554 weighted_score: jw_weighted,
555 description: format!("'{name_a}' vs '{name_b}' = {jw_score:.2}"),
556 })
557 .with_score_component(ScoreComponent {
558 name: "Levenshtein".to_string(),
559 weight: self.config.levenshtein_weight,
560 raw_score: lev_score,
561 weighted_score: lev_weighted,
562 description: format!(
563 "edit distance {lev_distance} / max_len {max_len} = {lev_score:.2}"
564 ),
565 });
566
567 if version_boost > 0.0 {
568 explanation = explanation.with_score_component(ScoreComponent {
569 name: "Version boost".to_string(),
570 weight: 1.0,
571 raw_score: version_boost,
572 weighted_score: version_boost,
573 description: format!("versions match: {:?}", a.version),
574 });
575 }
576
577 explanation.with_normalization("lowercase")
578 }
579}
580
581#[cfg(test)]
582mod tests {
583 use super::*;
584
585 #[test]
586 fn test_exact_purl_match() {
587 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
588
589 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
590 a.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
591
592 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
593 b.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
594
595 assert_eq!(matcher.match_components(&a, &b), 1.0);
596 }
597
598 #[test]
599 fn test_fuzzy_name_match() {
600 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
601
602 let a = Component::new("lodash-es".to_string(), "comp-1".to_string());
604 let b = Component::new("lodash".to_string(), "comp-2".to_string());
605
606 let score = matcher.match_components(&a, &b);
607 assert!(
609 score >= 0.70,
610 "lodash-es vs lodash should have score >= 0.70, got {}",
611 score
612 );
613 }
614
615 #[test]
616 fn test_different_names_low_score() {
617 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::strict());
618
619 let a = Component::new("react".to_string(), "comp-1".to_string());
620 let b = Component::new("angular".to_string(), "comp-2".to_string());
621
622 let score = matcher.match_components(&a, &b);
623 assert!(
624 score < 0.5,
625 "react vs angular should have low score, got {}",
626 score
627 );
628 }
629
630 #[test]
631 fn test_multi_field_weights_normalized() {
632 let weights = config::MultiFieldWeights::balanced();
633 assert!(
634 weights.is_normalized(),
635 "Balanced weights should be normalized"
636 );
637
638 let weights = config::MultiFieldWeights::name_focused();
639 assert!(
640 weights.is_normalized(),
641 "Name-focused weights should be normalized"
642 );
643
644 let weights = config::MultiFieldWeights::security_focused();
645 assert!(
646 weights.is_normalized(),
647 "Security-focused weights should be normalized"
648 );
649 }
650
651 #[test]
652 fn test_multi_field_scoring_same_component() {
653 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
654 let weights = config::MultiFieldWeights::balanced();
655
656 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
657 a.version = Some("4.17.21".to_string());
658 a.ecosystem = Some(crate::model::Ecosystem::Npm);
659
660 let result = matcher.compute_multi_field_score(&a, &a, &weights);
663 assert!(
664 result.total > 0.90,
665 "Same component should score > 0.90, got {}",
666 result.total
667 );
668 assert_eq!(result.name_score, 1.0);
669 assert_eq!(result.version_score, 1.0);
670 assert_eq!(result.ecosystem_score, 1.0);
671 assert_eq!(
673 result.license_score, 0.5,
674 "Empty licenses should be neutral"
675 );
676 assert_eq!(
677 result.supplier_score, 0.5,
678 "Empty supplier should be neutral"
679 );
680 assert_eq!(result.group_score, 0.5, "Empty group should be neutral");
681 }
682
683 #[test]
684 fn test_multi_field_scoring_different_versions() {
685 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
686 let weights = config::MultiFieldWeights::balanced();
687
688 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
689 a.version = Some("4.17.21".to_string());
690 a.ecosystem = Some(crate::model::Ecosystem::Npm);
691
692 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
693 b.version = Some("4.17.20".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
695
696 let result = matcher.compute_multi_field_score(&a, &b, &weights);
697
698 assert!(result.name_score > 0.9, "Name score should be > 0.9");
700
701 assert!(
705 result.version_score > 0.7,
706 "Same major.minor with patch diff should score high, got {}",
707 result.version_score
708 );
709
710 assert_eq!(
712 result.ecosystem_score, 1.0,
713 "Same ecosystem should score 1.0"
714 );
715
716 assert!(
718 result.total > 0.8,
719 "Total should be > 0.8, got {}",
720 result.total
721 );
722 }
723
724 #[test]
725 fn test_multi_field_scoring_different_major_versions() {
726 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
727 let weights = config::MultiFieldWeights::balanced();
728
729 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
730 a.version = Some("4.17.21".to_string());
731 a.ecosystem = Some(crate::model::Ecosystem::Npm);
732
733 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
734 b.version = Some("3.10.0".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
736
737 let result = matcher.compute_multi_field_score(&a, &b, &weights);
738
739 assert!(
743 result.version_score < 0.3,
744 "Different major versions should score low, got {}",
745 result.version_score
746 );
747 }
748
749 #[test]
750 fn test_multi_field_scoring_legacy_weights() {
751 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
753 let weights = config::MultiFieldWeights::legacy();
754
755 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
756 a.version = Some("4.17.21".to_string());
757 a.ecosystem = Some(crate::model::Ecosystem::Npm);
758
759 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
760 b.version = Some("4.17.20".to_string());
761 b.ecosystem = Some(crate::model::Ecosystem::Npm);
762
763 let result = matcher.compute_multi_field_score(&a, &b, &weights);
764
765 assert_eq!(
767 result.version_score, 0.0,
768 "Legacy mode: different versions should score 0"
769 );
770 }
771
772 #[test]
773 fn test_multi_field_config_preset() {
774 let config = FuzzyMatchConfig::from_preset("balanced-multi").unwrap();
775 assert!(config.field_weights.is_some());
776
777 let config = FuzzyMatchConfig::from_preset("strict_multi").unwrap();
778 assert!(config.field_weights.is_some());
779 }
780
781 #[test]
782 fn test_multi_field_score_result_summary() {
783 let result = MultiFieldScoreResult {
784 total: 0.85,
785 name_score: 1.0,
786 version_score: 0.0,
787 ecosystem_score: 1.0,
788 license_score: 0.5,
789 supplier_score: 0.5,
790 group_score: 0.5,
791 };
792
793 let summary = result.summary();
794 assert!(summary.contains("0.85"));
795 assert!(summary.contains("name: 1.00"));
796 }
797
798 #[test]
799 fn test_token_similarity_exact() {
800 let score = string_similarity::compute_token_similarity("react-dom", "react-dom");
801 assert_eq!(score, 1.0);
802 }
803
804 #[test]
805 fn test_token_similarity_reordered() {
806 let score = string_similarity::compute_token_similarity("react-dom", "dom-react");
808 assert_eq!(score, 1.0, "Reordered tokens should match perfectly");
809 }
810
811 #[test]
812 fn test_token_similarity_partial() {
813 let score = string_similarity::compute_token_similarity("react-dom-utils", "react-dom");
815 assert!(
817 (score - 0.667).abs() < 0.01,
818 "Partial overlap should be ~0.67, got {}",
819 score
820 );
821 }
822
823 #[test]
824 fn test_token_similarity_different_delimiters() {
825 let score =
827 string_similarity::compute_token_similarity("my_package_name", "my-package-name");
828 assert_eq!(score, 1.0, "Different delimiters should match");
829 }
830
831 #[test]
832 fn test_token_similarity_no_overlap() {
833 let score = string_similarity::compute_token_similarity("react", "angular");
834 assert_eq!(score, 0.0, "No common tokens should score 0");
835 }
836
837 #[test]
838 fn test_version_similarity_exact() {
839 let v1 = "1.2.3".to_string();
840 let v2 = "1.2.3".to_string();
841 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
842 assert_eq!(score, 0.10, "Exact version match should give max boost");
843 }
844
845 #[test]
846 fn test_version_similarity_same_major_minor() {
847 let v1 = "1.2.3".to_string();
848 let v2 = "1.2.4".to_string();
849 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
850 assert_eq!(score, 0.07, "Same major.minor should give 0.07 boost");
851 }
852
853 #[test]
854 fn test_version_similarity_same_major() {
855 let v1 = "1.2.3".to_string();
856 let v2 = "1.5.0".to_string();
857 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
858 assert_eq!(score, 0.04, "Same major should give 0.04 boost");
859 }
860
861 #[test]
862 fn test_version_similarity_different_major() {
863 let v1 = "1.2.3".to_string();
864 let v2 = "2.0.0".to_string();
865 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
866 assert_eq!(score, 0.0, "Different major versions should give no boost");
867 }
868
869 #[test]
870 fn test_version_similarity_prerelease() {
871 let v1 = "1.2.3-beta".to_string();
873 let v2 = "1.2.4".to_string();
874 let score = FuzzyMatcher::compute_version_similarity(Some(&v1), Some(&v2));
875 assert_eq!(score, 0.07, "Prerelease should still match major.minor");
876 }
877
878 #[test]
879 fn test_version_similarity_missing() {
880 let v = "1.0.0".to_string();
881 let score = FuzzyMatcher::compute_version_similarity(None, Some(&v));
882 assert_eq!(score, 0.0, "Missing version should give no boost");
883
884 let score = FuzzyMatcher::compute_version_similarity(None, None);
885 assert_eq!(score, 0.0, "Both missing should give no boost");
886 }
887
888 #[test]
889 fn test_fuzzy_match_with_reordered_tokens() {
890 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
891
892 let a = Component::new("react-dom".to_string(), "comp-1".to_string());
893 let b = Component::new("dom-react".to_string(), "comp-2".to_string());
894
895 let score = matcher.match_components(&a, &b);
896 assert!(
898 score > 0.5,
899 "Reordered names should still match, got {}",
900 score
901 );
902 }
903
904 #[test]
905 fn test_fuzzy_match_version_boost() {
906 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
907
908 let mut a = Component::new("lodash-utils".to_string(), "comp-1".to_string());
910 a.version = Some("4.17.21".to_string());
911
912 let mut b = Component::new("lodash-util".to_string(), "comp-2".to_string());
913 b.version = Some("4.17.20".to_string()); let mut c = Component::new("lodash-util".to_string(), "comp-3".to_string());
916 c.version = Some("5.0.0".to_string()); let score_same_minor = matcher.match_components(&a, &b);
919 let score_diff_major = matcher.match_components(&a, &c);
920
921 assert!(score_same_minor > 0.0, "Same minor should match");
923 assert!(score_diff_major > 0.0, "Different major should still match");
924 assert!(
925 score_same_minor > score_diff_major,
926 "Same minor version should score higher: {} vs {}",
927 score_same_minor,
928 score_diff_major
929 );
930 }
931
932 #[test]
933 fn test_soundex_basic() {
934 assert_eq!(string_similarity::soundex("Robert"), "R163");
936 assert_eq!(string_similarity::soundex("Rupert"), "R163"); assert_eq!(string_similarity::soundex("Smith"), "S530");
938 assert_eq!(string_similarity::soundex("Smyth"), "S530"); }
940
941 #[test]
942 fn test_soundex_empty() {
943 assert_eq!(string_similarity::soundex(""), "");
944 assert_eq!(string_similarity::soundex("123"), ""); }
946
947 #[test]
948 fn test_phonetic_similarity_exact() {
949 let score = string_similarity::compute_phonetic_similarity("color", "colour");
950 assert_eq!(score, 1.0, "color and colour should match phonetically");
951 }
952
953 #[test]
954 fn test_phonetic_similarity_different() {
955 let score = string_similarity::compute_phonetic_similarity("react", "angular");
956 assert!(
957 score < 0.5,
958 "Different names should have low phonetic similarity"
959 );
960 }
961
962 #[test]
963 fn test_phonetic_similarity_compound() {
964 let score = string_similarity::compute_phonetic_similarity("json-parser", "jayson-parser");
966 assert!(
967 score > 0.5,
968 "Similar sounding compound names should match: {}",
969 score
970 );
971 }
972
973 #[test]
974 fn test_fuzzy_match_with_phonetic() {
975 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
976
977 let a = Component::new("color-utils".to_string(), "comp-1".to_string());
978 let b = Component::new("colour-utils".to_string(), "comp-2".to_string());
979
980 let score = matcher.match_components(&a, &b);
981 assert!(
982 score > 0.7,
983 "Phonetically similar names should match: {}",
984 score
985 );
986 }
987}