1pub mod adaptive;
30mod aliases;
31mod config;
32pub mod cross_ecosystem;
33pub mod custom_rules;
34pub mod ecosystem_config;
35pub mod index;
36pub mod lsh;
37mod purl;
38pub mod rule_engine;
39mod rules;
40mod traits;
41
42pub use adaptive::{
43 AdaptiveMatching, AdaptiveMethod, AdaptiveThreshold, AdaptiveThresholdConfig,
44 AdaptiveThresholdResult, ScoreStats,
45};
46pub use aliases::AliasTable;
47pub use config::{CrossEcosystemConfig, FuzzyMatchConfig, MultiFieldWeights};
48pub use cross_ecosystem::{CrossEcosystemDb, CrossEcosystemMatch, PackageFamily};
49pub use custom_rules::{
50 AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig, RulePrecedence,
51 RulesSummary,
52};
53pub use ecosystem_config::{
54 ConfigError, CustomEquivalence, CustomRules, EcosystemConfig, EcosystemRulesConfig,
55 GlobalSettings, GroupMigration, ImportMapping, NormalizationConfig, PackageGroup,
56 ScopeHandling, SecurityConfig, TyposquatEntry, VersionSpec, VersioningConfig,
57};
58pub use index::{
59 BatchCandidateConfig, BatchCandidateGenerator, BatchCandidateResult, BatchCandidateStats,
60 ComponentIndex, IndexStats, LazyComponentIndex, NormalizedEntry,
61};
62pub use lsh::{LshConfig, LshIndex, LshIndexStats, MinHashSignature};
63pub use purl::PurlNormalizer;
64pub use rule_engine::{AppliedRule, AppliedRuleType, RuleApplicationResult, RuleEngine};
65pub use rules::EcosystemRules;
66pub use traits::{
67 CacheConfig, CacheStats, CachedMatcher, ComponentMatcher, CompositeMatcher,
68 CompositeMatcherBuilder, MatchExplanation, MatchMetadata, MatchResult, MatchTier,
69 ScoreComponent,
70};
71
72use crate::model::Component;
73use strsim::{jaro_winkler, levenshtein};
74
75pub struct FuzzyMatcher {
77 config: FuzzyMatchConfig,
78 alias_table: AliasTable,
79 purl_normalizer: PurlNormalizer,
80 ecosystem_rules: EcosystemRules,
81}
82
83impl FuzzyMatcher {
84 pub fn new(config: FuzzyMatchConfig) -> Self {
86 Self {
87 config,
88 alias_table: AliasTable::default(),
89 purl_normalizer: PurlNormalizer::new(),
90 ecosystem_rules: EcosystemRules::new(),
91 }
92 }
93
94 pub fn config(&self) -> &FuzzyMatchConfig {
96 &self.config
97 }
98
99 pub fn with_alias_table(mut self, table: AliasTable) -> Self {
101 self.alias_table = table;
102 self
103 }
104
105 pub fn match_components(&self, a: &Component, b: &Component) -> f64 {
107 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
109 let norm_a = self.purl_normalizer.normalize(purl_a);
110 let norm_b = self.purl_normalizer.normalize(purl_b);
111 if norm_a == norm_b {
112 return 1.0;
113 }
114 }
115
116 if self.check_alias_match(a, b) {
118 return 0.95;
119 }
120
121 if let Some(score) = self.check_ecosystem_rules(a, b) {
123 if score >= 0.90 {
124 return score;
125 }
126 }
127
128 if let Some(ref weights) = self.config.field_weights {
130 let result = self.compute_multi_field_score(a, b, weights);
132 if result.total >= self.config.threshold {
133 return result.total;
134 }
135 } else {
136 let fuzzy_score = self.compute_fuzzy_score(a, b);
138 if fuzzy_score >= self.config.threshold {
139 return fuzzy_score;
140 }
141 }
142
143 0.0
144 }
145
146 fn check_alias_match(&self, a: &Component, b: &Component) -> bool {
148 let names_a = self.get_all_names(a);
150 let names_b = self.get_all_names(b);
151
152 for name_a in &names_a {
153 if let Some(canonical) = self.alias_table.get_canonical(name_a) {
154 for name_b in &names_b {
155 if self.alias_table.is_alias(&canonical, name_b) {
156 return true;
157 }
158 }
159 }
160 }
161
162 false
163 }
164
165 fn get_all_names(&self, comp: &Component) -> Vec<String> {
167 let mut names = vec![comp.name.clone()];
168 names.extend(comp.identifiers.aliases.clone());
169
170 if let Some(purl) = &comp.identifiers.purl {
172 if let Some(name) = self.extract_name_from_purl(purl) {
173 names.push(name);
174 }
175 }
176
177 names
178 }
179
180 fn extract_name_from_purl(&self, purl: &str) -> Option<String> {
182 let without_pkg = purl.strip_prefix("pkg:")?;
184 let parts: Vec<&str> = without_pkg.split('/').collect();
185
186 if parts.len() >= 2 {
187 let name_part = parts.last()?;
188 let name = name_part.split('@').next()?;
190 Some(name.to_string())
191 } else {
192 None
193 }
194 }
195
196 fn check_ecosystem_rules(&self, a: &Component, b: &Component) -> Option<f64> {
198 let ecosystem_a = a.ecosystem.as_ref()?;
199 let ecosystem_b = b.ecosystem.as_ref()?;
200
201 if ecosystem_a != ecosystem_b {
203 return None;
204 }
205
206 let norm_a = self.ecosystem_rules.normalize_name(&a.name, ecosystem_a);
207 let norm_b = self.ecosystem_rules.normalize_name(&b.name, ecosystem_b);
208
209 if norm_a == norm_b {
210 return Some(0.90);
211 }
212
213 None
214 }
215
216 fn compute_fuzzy_score(&self, a: &Component, b: &Component) -> f64 {
218 let name_a = a.name.to_lowercase();
219 let name_b = b.name.to_lowercase();
220
221 let jw_score = jaro_winkler(&name_a, &name_b);
223
224 let max_len = name_a.len().max(name_b.len());
226 let lev_distance = levenshtein(&name_a, &name_b);
227 let lev_score = if max_len > 0 {
228 1.0 - (lev_distance as f64 / max_len as f64)
229 } else {
230 1.0
231 };
232
233 let token_score = Self::compute_token_similarity(&name_a, &name_b);
235
236 let phonetic_score = Self::compute_phonetic_similarity(&name_a, &name_b);
238
239 let char_score = (jw_score * self.config.jaro_winkler_weight)
241 + (lev_score * self.config.levenshtein_weight);
242
243 let combined = char_score.max(token_score).max(phonetic_score * 0.85);
246
247 let version_boost = Self::compute_version_similarity(&a.version, &b.version);
249
250 (combined + version_boost).min(1.0)
251 }
252
253 fn compute_token_similarity(name_a: &str, name_b: &str) -> f64 {
258 use std::collections::HashSet;
259
260 let tokens_a: HashSet<&str> = name_a
261 .split(['-', '_', '.', '@', '/'])
262 .filter(|t| !t.is_empty())
263 .collect();
264
265 let tokens_b: HashSet<&str> = name_b
266 .split(['-', '_', '.', '@', '/'])
267 .filter(|t| !t.is_empty())
268 .collect();
269
270 if tokens_a.is_empty() && tokens_b.is_empty() {
271 return 1.0;
272 }
273 if tokens_a.is_empty() || tokens_b.is_empty() {
274 return 0.0;
275 }
276
277 let intersection = tokens_a.intersection(&tokens_b).count();
278 let union = tokens_a.union(&tokens_b).count();
279
280 if union > 0 {
281 intersection as f64 / union as f64
282 } else {
283 0.0
284 }
285 }
286
287 fn compute_version_similarity(va: &Option<String>, vb: &Option<String>) -> f64 {
296 match (va, vb) {
297 (Some(a), Some(b)) if a == b => 0.10, (Some(a), Some(b)) => {
299 let parts_a: Vec<&str> = a.split('.').collect();
301 let parts_b: Vec<&str> = b.split('.').collect();
302
303 let major_a = parts_a.first().and_then(|s| s.parse::<u32>().ok());
305 let major_b = parts_b.first().and_then(|s| s.parse::<u32>().ok());
306 let minor_a = parts_a
307 .get(1)
308 .and_then(|s| s.split('-').next())
309 .and_then(|s| s.parse::<u32>().ok());
310 let minor_b = parts_b
311 .get(1)
312 .and_then(|s| s.split('-').next())
313 .and_then(|s| s.parse::<u32>().ok());
314
315 match (major_a, major_b, minor_a, minor_b) {
316 (Some(ma), Some(mb), Some(mia), Some(mib)) if ma == mb && mia == mib => 0.07,
317 (Some(ma), Some(mb), _, _) if ma == mb => 0.04,
318 _ => 0.0,
319 }
320 }
321 _ => 0.0, }
323 }
324
325 fn soundex(name: &str) -> String {
332 if name.is_empty() {
333 return String::new();
334 }
335
336 let name_upper: String = name
337 .to_uppercase()
338 .chars()
339 .filter(|c| c.is_ascii_alphabetic())
340 .collect();
341 if name_upper.is_empty() {
342 return String::new();
343 }
344
345 let mut chars = name_upper.chars();
346 let first_char = chars.next().expect("name_upper is non-empty after empty check above");
347 let mut code = String::with_capacity(4);
348 code.push(first_char);
349
350 let mut last_digit = Self::soundex_digit(first_char);
351
352 for c in chars {
353 let digit = Self::soundex_digit(c);
354 if digit != '0' && digit != last_digit {
355 code.push(digit);
356 if code.len() == 4 {
357 break;
358 }
359 }
360 if digit != '0' {
361 last_digit = digit;
362 }
363 }
364
365 while code.len() < 4 {
367 code.push('0');
368 }
369
370 code
371 }
372
373 fn soundex_digit(c: char) -> char {
375 match c {
376 'B' | 'F' | 'P' | 'V' => '1',
377 'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' => '2',
378 'D' | 'T' => '3',
379 'L' => '4',
380 'M' | 'N' => '5',
381 'R' => '6',
382 _ => '0', }
384 }
385
386 pub fn compute_phonetic_similarity(name_a: &str, name_b: &str) -> f64 {
391 let soundex_a = Self::soundex(name_a);
393 let soundex_b = Self::soundex(name_b);
394
395 if !soundex_a.is_empty() && soundex_a == soundex_b {
396 return 1.0;
397 }
398
399 let tokens_a: Vec<&str> = name_a
401 .split(|c: char| !c.is_alphanumeric())
402 .filter(|t| !t.is_empty())
403 .collect();
404 let tokens_b: Vec<&str> = name_b
405 .split(|c: char| !c.is_alphanumeric())
406 .filter(|t| !t.is_empty())
407 .collect();
408
409 if tokens_a.is_empty() || tokens_b.is_empty() {
410 return 0.0;
411 }
412
413 let mut matches = 0;
415 let total = tokens_a.len().max(tokens_b.len());
416
417 for ta in &tokens_a {
418 let sa = Self::soundex(ta);
419 if sa.is_empty() {
420 continue;
421 }
422 for tb in &tokens_b {
423 let sb = Self::soundex(tb);
424 if sa == sb {
425 matches += 1;
426 break;
427 }
428 }
429 }
430
431 if total == 0 {
432 0.0
433 } else {
434 matches as f64 / total as f64
435 }
436 }
437
438 pub fn compute_multi_field_score(
442 &self,
443 a: &Component,
444 b: &Component,
445 weights: &config::MultiFieldWeights,
446 ) -> MultiFieldScoreResult {
447 use std::collections::HashSet;
448
449 let mut result = MultiFieldScoreResult::default();
450
451 let name_score = self.compute_fuzzy_score(a, b);
453 result.name_score = name_score;
454 result.total += name_score * weights.name;
455
456 let version_score = if weights.version_divergence_enabled {
458 compute_version_divergence_score(&a.version, &b.version, weights)
459 } else {
460 match (&a.version, &b.version) {
462 (Some(va), Some(vb)) if va == vb => 1.0,
463 (None, None) => 0.5, _ => 0.0,
465 }
466 };
467 result.version_score = version_score;
468 result.total += version_score * weights.version;
469
470 let (ecosystem_score, ecosystem_penalty) = match (&a.ecosystem, &b.ecosystem) {
472 (Some(ea), Some(eb)) if ea == eb => (1.0, 0.0),
473 (None, None) => (0.5, 0.0), (Some(_), Some(_)) => (0.0, weights.ecosystem_mismatch_penalty), _ => (0.0, 0.0), };
477 result.ecosystem_score = ecosystem_score;
478 result.total += ecosystem_score * weights.ecosystem + ecosystem_penalty;
479
480 let licenses_a: HashSet<_> = a
482 .licenses
483 .declared
484 .iter()
485 .map(|l| l.expression.as_str())
486 .collect();
487 let licenses_b: HashSet<_> = b
488 .licenses
489 .declared
490 .iter()
491 .map(|l| l.expression.as_str())
492 .collect();
493 let license_score = if licenses_a.is_empty() && licenses_b.is_empty() {
494 0.5 } else if licenses_a.is_empty() || licenses_b.is_empty() {
496 0.0 } else {
498 let intersection = licenses_a.intersection(&licenses_b).count();
499 let union = licenses_a.union(&licenses_b).count();
500 if union > 0 {
501 intersection as f64 / union as f64
502 } else {
503 0.0
504 }
505 };
506 result.license_score = license_score;
507 result.total += license_score * weights.licenses;
508
509 let supplier_score = match (&a.supplier, &b.supplier) {
511 (Some(sa), Some(sb)) if sa.name.to_lowercase() == sb.name.to_lowercase() => 1.0,
512 (None, None) => 0.5, _ => 0.0,
514 };
515 result.supplier_score = supplier_score;
516 result.total += supplier_score * weights.supplier;
517
518 let group_score = match (&a.group, &b.group) {
520 (Some(ga), Some(gb)) if ga.to_lowercase() == gb.to_lowercase() => 1.0,
521 (None, None) => 0.5, _ => 0.0,
523 };
524 result.group_score = group_score;
525 result.total += group_score * weights.group;
526
527 result.total = result.total.clamp(0.0, 1.0);
529
530 result
531 }
532}
533
534fn compute_version_divergence_score(
542 version_a: &Option<String>,
543 version_b: &Option<String>,
544 weights: &config::MultiFieldWeights,
545) -> f64 {
546 match (version_a, version_b) {
547 (Some(va), Some(vb)) if va == vb => 1.0,
548 (None, None) => 0.5, (Some(va), Some(vb)) => {
550 let parts_a = parse_semver_parts(va);
552 let parts_b = parse_semver_parts(vb);
553
554 match (parts_a, parts_b) {
555 (Some((maj_a, min_a, patch_a)), Some((maj_b, min_b, patch_b))) => {
556 if maj_a == maj_b && min_a == min_b {
557 let patch_diff = (patch_a as i64 - patch_b as i64).unsigned_abs() as f64;
559 (0.8 - patch_diff * 0.01).max(0.5)
560 } else if maj_a == maj_b {
561 let minor_diff = (min_a as i64 - min_b as i64).unsigned_abs() as f64;
563 (0.5 - minor_diff * weights.version_minor_penalty).max(0.2)
564 } else {
565 let major_diff = (maj_a as i64 - maj_b as i64).unsigned_abs() as f64;
567 (0.3 - major_diff * weights.version_major_penalty).max(0.0)
568 }
569 }
570 _ => {
571 let common_prefix_len = va
574 .chars()
575 .zip(vb.chars())
576 .take_while(|(a, b)| a == b)
577 .count();
578 let max_len = va.len().max(vb.len());
579 if max_len > 0 && common_prefix_len > 0 {
580 (common_prefix_len as f64 / max_len as f64 * 0.5).min(0.4)
581 } else {
582 0.1 }
584 }
585 }
586 }
587 _ => 0.0, }
589}
590
591fn parse_semver_parts(version: &str) -> Option<(u32, u32, u32)> {
594 let version = version.trim_start_matches(['v', 'V']);
596
597 let mut parts = version.split(['.', '-', '+']);
599
600 let major: u32 = parts.next()?.parse().ok()?;
601 let minor: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
602 let patch: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
603
604 Some((major, minor, patch))
605}
606
607#[derive(Debug, Clone, Default)]
609pub struct MultiFieldScoreResult {
610 pub total: f64,
612 pub name_score: f64,
614 pub version_score: f64,
616 pub ecosystem_score: f64,
618 pub license_score: f64,
620 pub supplier_score: f64,
622 pub group_score: f64,
624}
625
626impl MultiFieldScoreResult {
627 pub fn summary(&self) -> String {
629 format!(
630 "Total: {:.2} (name: {:.2}, version: {:.2}, ecosystem: {:.2}, licenses: {:.2}, supplier: {:.2}, group: {:.2})",
631 self.total,
632 self.name_score,
633 self.version_score,
634 self.ecosystem_score,
635 self.license_score,
636 self.supplier_score,
637 self.group_score
638 )
639 }
640}
641
642impl Default for FuzzyMatcher {
643 fn default() -> Self {
644 Self::new(FuzzyMatchConfig::balanced())
645 }
646}
647
648impl ComponentMatcher for FuzzyMatcher {
649 fn match_score(&self, a: &Component, b: &Component) -> f64 {
650 self.match_components(a, b)
651 }
652
653 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
654 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
656 let norm_a = self.purl_normalizer.normalize(purl_a);
657 let norm_b = self.purl_normalizer.normalize(purl_b);
658 if norm_a == norm_b {
659 return MatchResult::with_metadata(
660 1.0,
661 MatchTier::ExactIdentifier,
662 MatchMetadata {
663 matched_fields: vec!["purl".to_string()],
664 normalization: Some("purl_normalized".to_string()),
665 rule_id: None,
666 },
667 );
668 }
669 }
670
671 if self.check_alias_match(a, b) {
673 return MatchResult::with_metadata(
674 0.95,
675 MatchTier::Alias,
676 MatchMetadata {
677 matched_fields: vec!["name".to_string()],
678 normalization: Some("alias_table".to_string()),
679 rule_id: None,
680 },
681 );
682 }
683
684 if let Some(score) = self.check_ecosystem_rules(a, b) {
686 if score >= 0.90 {
687 return MatchResult::with_metadata(
688 score,
689 MatchTier::EcosystemRule,
690 MatchMetadata {
691 matched_fields: vec!["name".to_string(), "ecosystem".to_string()],
692 normalization: Some("ecosystem_rules".to_string()),
693 rule_id: None,
694 },
695 );
696 }
697 }
698
699 let fuzzy_score = self.compute_fuzzy_score(a, b);
701 if fuzzy_score >= self.config.threshold {
702 return MatchResult::with_metadata(
703 fuzzy_score,
704 MatchTier::Fuzzy,
705 MatchMetadata {
706 matched_fields: vec!["name".to_string()],
707 normalization: Some("fuzzy_similarity".to_string()),
708 rule_id: None,
709 },
710 );
711 }
712
713 MatchResult::no_match()
714 }
715
716 fn name(&self) -> &str {
717 "FuzzyMatcher"
718 }
719
720 fn threshold(&self) -> f64 {
721 self.config.threshold
722 }
723
724 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
725 use strsim::{jaro_winkler, levenshtein};
726
727 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
729 let norm_a = self.purl_normalizer.normalize(purl_a);
730 let norm_b = self.purl_normalizer.normalize(purl_b);
731 if norm_a == norm_b {
732 return MatchExplanation::matched(
733 MatchTier::ExactIdentifier,
734 1.0,
735 format!(
736 "Exact PURL match: '{}' equals '{}' after normalization",
737 purl_a, purl_b
738 ),
739 )
740 .with_normalization("purl_normalized");
741 }
742 }
743
744 if self.check_alias_match(a, b) {
746 return MatchExplanation::matched(
747 MatchTier::Alias,
748 0.95,
749 format!(
750 "'{}' and '{}' are known aliases of the same package",
751 a.name, b.name
752 ),
753 )
754 .with_normalization("alias_table");
755 }
756
757 if let Some(score) = self.check_ecosystem_rules(a, b) {
759 if score >= 0.90 {
760 let ecosystem = a
761 .ecosystem
762 .as_ref()
763 .map(|e| e.to_string())
764 .unwrap_or_else(|| "unknown".to_string());
765 return MatchExplanation::matched(
766 MatchTier::EcosystemRule,
767 score,
768 format!(
769 "Names match after {} ecosystem normalization: '{}' -> '{}'",
770 ecosystem, a.name, b.name
771 ),
772 )
773 .with_normalization(format!("{}_normalization", ecosystem));
774 }
775 }
776
777 let name_a = a.name.to_lowercase();
779 let name_b = b.name.to_lowercase();
780
781 let jw_score = jaro_winkler(&name_a, &name_b);
782 let max_len = name_a.len().max(name_b.len());
783 let lev_distance = levenshtein(&name_a, &name_b);
784 let lev_score = if max_len > 0 {
785 1.0 - (lev_distance as f64 / max_len as f64)
786 } else {
787 1.0
788 };
789
790 let jw_weighted = jw_score * self.config.jaro_winkler_weight;
791 let lev_weighted = lev_score * self.config.levenshtein_weight;
792
793 let version_boost = if a.version == b.version && a.version.is_some() {
794 0.05
795 } else {
796 0.0
797 };
798
799 let combined = (jw_weighted + lev_weighted + version_boost).min(1.0);
800
801 let mut explanation = if combined >= self.config.threshold {
802 MatchExplanation::matched(
803 MatchTier::Fuzzy,
804 combined,
805 format!(
806 "Fuzzy match: '{}' ~ '{}' with {:.0}% similarity",
807 a.name,
808 b.name,
809 combined * 100.0
810 ),
811 )
812 } else {
813 MatchExplanation::no_match(format!(
814 "Fuzzy similarity {:.2} below threshold {:.2}",
815 combined, self.config.threshold
816 ))
817 };
818
819 explanation = explanation
821 .with_score_component(ScoreComponent {
822 name: "Jaro-Winkler".to_string(),
823 weight: self.config.jaro_winkler_weight,
824 raw_score: jw_score,
825 weighted_score: jw_weighted,
826 description: format!("'{}' vs '{}' = {:.2}", name_a, name_b, jw_score),
827 })
828 .with_score_component(ScoreComponent {
829 name: "Levenshtein".to_string(),
830 weight: self.config.levenshtein_weight,
831 raw_score: lev_score,
832 weighted_score: lev_weighted,
833 description: format!(
834 "edit distance {} / max_len {} = {:.2}",
835 lev_distance, max_len, lev_score
836 ),
837 });
838
839 if version_boost > 0.0 {
840 explanation = explanation.with_score_component(ScoreComponent {
841 name: "Version boost".to_string(),
842 weight: 1.0,
843 raw_score: version_boost,
844 weighted_score: version_boost,
845 description: format!("versions match: {:?}", a.version),
846 });
847 }
848
849 explanation.with_normalization("lowercase")
850 }
851}
852
853#[cfg(test)]
854mod tests {
855 use super::*;
856
857 #[test]
858 fn test_exact_purl_match() {
859 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
860
861 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
862 a.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
863
864 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
865 b.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
866
867 assert_eq!(matcher.match_components(&a, &b), 1.0);
868 }
869
870 #[test]
871 fn test_fuzzy_name_match() {
872 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
873
874 let a = Component::new("lodash-es".to_string(), "comp-1".to_string());
876 let b = Component::new("lodash".to_string(), "comp-2".to_string());
877
878 let score = matcher.match_components(&a, &b);
879 assert!(
881 score >= 0.70,
882 "lodash-es vs lodash should have score >= 0.70, got {}",
883 score
884 );
885 }
886
887 #[test]
888 fn test_different_names_low_score() {
889 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::strict());
890
891 let a = Component::new("react".to_string(), "comp-1".to_string());
892 let b = Component::new("angular".to_string(), "comp-2".to_string());
893
894 let score = matcher.match_components(&a, &b);
895 assert!(
896 score < 0.5,
897 "react vs angular should have low score, got {}",
898 score
899 );
900 }
901
902 #[test]
903 fn test_multi_field_weights_normalized() {
904 let weights = config::MultiFieldWeights::balanced();
905 assert!(
906 weights.is_normalized(),
907 "Balanced weights should be normalized"
908 );
909
910 let weights = config::MultiFieldWeights::name_focused();
911 assert!(
912 weights.is_normalized(),
913 "Name-focused weights should be normalized"
914 );
915
916 let weights = config::MultiFieldWeights::security_focused();
917 assert!(
918 weights.is_normalized(),
919 "Security-focused weights should be normalized"
920 );
921 }
922
923 #[test]
924 fn test_multi_field_scoring_same_component() {
925 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
926 let weights = config::MultiFieldWeights::balanced();
927
928 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
929 a.version = Some("4.17.21".to_string());
930 a.ecosystem = Some(crate::model::Ecosystem::Npm);
931
932 let result = matcher.compute_multi_field_score(&a, &a, &weights);
935 assert!(
936 result.total > 0.90,
937 "Same component should score > 0.90, got {}",
938 result.total
939 );
940 assert_eq!(result.name_score, 1.0);
941 assert_eq!(result.version_score, 1.0);
942 assert_eq!(result.ecosystem_score, 1.0);
943 assert_eq!(
945 result.license_score, 0.5,
946 "Empty licenses should be neutral"
947 );
948 assert_eq!(
949 result.supplier_score, 0.5,
950 "Empty supplier should be neutral"
951 );
952 assert_eq!(result.group_score, 0.5, "Empty group should be neutral");
953 }
954
955 #[test]
956 fn test_multi_field_scoring_different_versions() {
957 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
958 let weights = config::MultiFieldWeights::balanced();
959
960 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
961 a.version = Some("4.17.21".to_string());
962 a.ecosystem = Some(crate::model::Ecosystem::Npm);
963
964 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
965 b.version = Some("4.17.20".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
967
968 let result = matcher.compute_multi_field_score(&a, &b, &weights);
969
970 assert!(result.name_score > 0.9, "Name score should be > 0.9");
972
973 assert!(
977 result.version_score > 0.7,
978 "Same major.minor with patch diff should score high, got {}",
979 result.version_score
980 );
981
982 assert_eq!(
984 result.ecosystem_score, 1.0,
985 "Same ecosystem should score 1.0"
986 );
987
988 assert!(
990 result.total > 0.8,
991 "Total should be > 0.8, got {}",
992 result.total
993 );
994 }
995
996 #[test]
997 fn test_multi_field_scoring_different_major_versions() {
998 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
999 let weights = config::MultiFieldWeights::balanced();
1000
1001 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
1002 a.version = Some("4.17.21".to_string());
1003 a.ecosystem = Some(crate::model::Ecosystem::Npm);
1004
1005 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
1006 b.version = Some("3.10.0".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
1008
1009 let result = matcher.compute_multi_field_score(&a, &b, &weights);
1010
1011 assert!(
1015 result.version_score < 0.3,
1016 "Different major versions should score low, got {}",
1017 result.version_score
1018 );
1019 }
1020
1021 #[test]
1022 fn test_multi_field_scoring_legacy_weights() {
1023 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
1025 let weights = config::MultiFieldWeights::legacy();
1026
1027 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
1028 a.version = Some("4.17.21".to_string());
1029 a.ecosystem = Some(crate::model::Ecosystem::Npm);
1030
1031 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
1032 b.version = Some("4.17.20".to_string());
1033 b.ecosystem = Some(crate::model::Ecosystem::Npm);
1034
1035 let result = matcher.compute_multi_field_score(&a, &b, &weights);
1036
1037 assert_eq!(
1039 result.version_score, 0.0,
1040 "Legacy mode: different versions should score 0"
1041 );
1042 }
1043
1044 #[test]
1045 fn test_multi_field_config_preset() {
1046 let config = FuzzyMatchConfig::from_preset("balanced-multi").unwrap();
1047 assert!(config.field_weights.is_some());
1048
1049 let config = FuzzyMatchConfig::from_preset("strict_multi").unwrap();
1050 assert!(config.field_weights.is_some());
1051 }
1052
1053 #[test]
1054 fn test_multi_field_score_result_summary() {
1055 let result = MultiFieldScoreResult {
1056 total: 0.85,
1057 name_score: 1.0,
1058 version_score: 0.0,
1059 ecosystem_score: 1.0,
1060 license_score: 0.5,
1061 supplier_score: 0.5,
1062 group_score: 0.5,
1063 };
1064
1065 let summary = result.summary();
1066 assert!(summary.contains("0.85"));
1067 assert!(summary.contains("name: 1.00"));
1068 }
1069
1070 #[test]
1071 fn test_token_similarity_exact() {
1072 let score = FuzzyMatcher::compute_token_similarity("react-dom", "react-dom");
1073 assert_eq!(score, 1.0);
1074 }
1075
1076 #[test]
1077 fn test_token_similarity_reordered() {
1078 let score = FuzzyMatcher::compute_token_similarity("react-dom", "dom-react");
1080 assert_eq!(score, 1.0, "Reordered tokens should match perfectly");
1081 }
1082
1083 #[test]
1084 fn test_token_similarity_partial() {
1085 let score = FuzzyMatcher::compute_token_similarity("react-dom-utils", "react-dom");
1087 assert!(
1089 (score - 0.667).abs() < 0.01,
1090 "Partial overlap should be ~0.67, got {}",
1091 score
1092 );
1093 }
1094
1095 #[test]
1096 fn test_token_similarity_different_delimiters() {
1097 let score = FuzzyMatcher::compute_token_similarity("my_package_name", "my-package-name");
1099 assert_eq!(score, 1.0, "Different delimiters should match");
1100 }
1101
1102 #[test]
1103 fn test_token_similarity_no_overlap() {
1104 let score = FuzzyMatcher::compute_token_similarity("react", "angular");
1105 assert_eq!(score, 0.0, "No common tokens should score 0");
1106 }
1107
1108 #[test]
1109 fn test_version_similarity_exact() {
1110 let score = FuzzyMatcher::compute_version_similarity(
1111 &Some("1.2.3".to_string()),
1112 &Some("1.2.3".to_string()),
1113 );
1114 assert_eq!(score, 0.10, "Exact version match should give max boost");
1115 }
1116
1117 #[test]
1118 fn test_version_similarity_same_major_minor() {
1119 let score = FuzzyMatcher::compute_version_similarity(
1120 &Some("1.2.3".to_string()),
1121 &Some("1.2.4".to_string()),
1122 );
1123 assert_eq!(score, 0.07, "Same major.minor should give 0.07 boost");
1124 }
1125
1126 #[test]
1127 fn test_version_similarity_same_major() {
1128 let score = FuzzyMatcher::compute_version_similarity(
1129 &Some("1.2.3".to_string()),
1130 &Some("1.5.0".to_string()),
1131 );
1132 assert_eq!(score, 0.04, "Same major should give 0.04 boost");
1133 }
1134
1135 #[test]
1136 fn test_version_similarity_different_major() {
1137 let score = FuzzyMatcher::compute_version_similarity(
1138 &Some("1.2.3".to_string()),
1139 &Some("2.0.0".to_string()),
1140 );
1141 assert_eq!(score, 0.0, "Different major versions should give no boost");
1142 }
1143
1144 #[test]
1145 fn test_version_similarity_prerelease() {
1146 let score = FuzzyMatcher::compute_version_similarity(
1148 &Some("1.2.3-beta".to_string()),
1149 &Some("1.2.4".to_string()),
1150 );
1151 assert_eq!(score, 0.07, "Prerelease should still match major.minor");
1152 }
1153
1154 #[test]
1155 fn test_version_similarity_missing() {
1156 let score = FuzzyMatcher::compute_version_similarity(&None, &Some("1.0.0".to_string()));
1157 assert_eq!(score, 0.0, "Missing version should give no boost");
1158
1159 let score = FuzzyMatcher::compute_version_similarity(&None, &None);
1160 assert_eq!(score, 0.0, "Both missing should give no boost");
1161 }
1162
1163 #[test]
1164 fn test_fuzzy_match_with_reordered_tokens() {
1165 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
1166
1167 let a = Component::new("react-dom".to_string(), "comp-1".to_string());
1168 let b = Component::new("dom-react".to_string(), "comp-2".to_string());
1169
1170 let score = matcher.match_components(&a, &b);
1171 assert!(
1173 score > 0.5,
1174 "Reordered names should still match, got {}",
1175 score
1176 );
1177 }
1178
1179 #[test]
1180 fn test_fuzzy_match_version_boost() {
1181 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
1182
1183 let mut a = Component::new("lodash-utils".to_string(), "comp-1".to_string());
1185 a.version = Some("4.17.21".to_string());
1186
1187 let mut b = Component::new("lodash-util".to_string(), "comp-2".to_string());
1188 b.version = Some("4.17.20".to_string()); let mut c = Component::new("lodash-util".to_string(), "comp-3".to_string());
1191 c.version = Some("5.0.0".to_string()); let score_same_minor = matcher.match_components(&a, &b);
1194 let score_diff_major = matcher.match_components(&a, &c);
1195
1196 assert!(score_same_minor > 0.0, "Same minor should match");
1198 assert!(score_diff_major > 0.0, "Different major should still match");
1199 assert!(
1200 score_same_minor > score_diff_major,
1201 "Same minor version should score higher: {} vs {}",
1202 score_same_minor,
1203 score_diff_major
1204 );
1205 }
1206
1207 #[test]
1208 fn test_soundex_basic() {
1209 assert_eq!(FuzzyMatcher::soundex("Robert"), "R163");
1211 assert_eq!(FuzzyMatcher::soundex("Rupert"), "R163"); assert_eq!(FuzzyMatcher::soundex("Smith"), "S530");
1213 assert_eq!(FuzzyMatcher::soundex("Smyth"), "S530"); }
1215
1216 #[test]
1217 fn test_soundex_empty() {
1218 assert_eq!(FuzzyMatcher::soundex(""), "");
1219 assert_eq!(FuzzyMatcher::soundex("123"), ""); }
1221
1222 #[test]
1223 fn test_phonetic_similarity_exact() {
1224 let score = FuzzyMatcher::compute_phonetic_similarity("color", "colour");
1225 assert_eq!(score, 1.0, "color and colour should match phonetically");
1226 }
1227
1228 #[test]
1229 fn test_phonetic_similarity_different() {
1230 let score = FuzzyMatcher::compute_phonetic_similarity("react", "angular");
1231 assert!(
1232 score < 0.5,
1233 "Different names should have low phonetic similarity"
1234 );
1235 }
1236
1237 #[test]
1238 fn test_phonetic_similarity_compound() {
1239 let score = FuzzyMatcher::compute_phonetic_similarity("json-parser", "jayson-parser");
1241 assert!(
1242 score > 0.5,
1243 "Similar sounding compound names should match: {}",
1244 score
1245 );
1246 }
1247
1248 #[test]
1249 fn test_fuzzy_match_with_phonetic() {
1250 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
1251
1252 let a = Component::new("color-utils".to_string(), "comp-1".to_string());
1253 let b = Component::new("colour-utils".to_string(), "comp-2".to_string());
1254
1255 let score = matcher.match_components(&a, &b);
1256 assert!(
1257 score > 0.7,
1258 "Phonetically similar names should match: {}",
1259 score
1260 );
1261 }
1262}