1pub mod adaptive;
30mod aliases;
31mod config;
32pub mod cross_ecosystem;
33pub mod custom_rules;
34pub mod ecosystem_config;
35pub mod index;
36pub mod lsh;
37mod purl;
38pub mod rule_engine;
39mod rules;
40mod traits;
41
42#[cfg(feature = "ml-matching")]
43mod embedding;
44
45pub use adaptive::{
46 AdaptiveMatching, AdaptiveMethod, AdaptiveThreshold, AdaptiveThresholdConfig,
47 AdaptiveThresholdResult, ScoreStats,
48};
49pub use aliases::AliasTable;
50pub use config::{CrossEcosystemConfig, FuzzyMatchConfig, MultiFieldWeights};
51pub use cross_ecosystem::{CrossEcosystemDb, CrossEcosystemMatch, PackageFamily};
52pub use custom_rules::{
53 AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig, RulePrecedence,
54 RulesSummary,
55};
56pub use ecosystem_config::{
57 ConfigError, CustomEquivalence, CustomRules, EcosystemConfig, EcosystemRulesConfig,
58 GlobalSettings, GroupMigration, ImportMapping, NormalizationConfig, PackageGroup,
59 ScopeHandling, SecurityConfig, TyposquatEntry, VersionSpec, VersioningConfig,
60};
61pub use index::{
62 BatchCandidateConfig, BatchCandidateGenerator, BatchCandidateResult, BatchCandidateStats,
63 ComponentIndex, IndexStats, LazyComponentIndex, NormalizedEntry,
64};
65pub use lsh::{LshConfig, LshIndex, LshIndexStats, MinHashSignature};
66pub use purl::PurlNormalizer;
67pub use rule_engine::{AppliedRule, AppliedRuleType, RuleApplicationResult, RuleEngine};
68pub use rules::EcosystemRules;
69pub use traits::{
70 CacheConfig, CacheStats, CachedMatcher, ComponentMatcher, CompositeMatcher,
71 CompositeMatcherBuilder, MatchExplanation, MatchMetadata, MatchResult, MatchTier,
72 ScoreComponent,
73};
74
75use crate::model::Component;
76use strsim::{jaro_winkler, levenshtein};
77
78pub struct FuzzyMatcher {
80 config: FuzzyMatchConfig,
81 alias_table: AliasTable,
82 purl_normalizer: PurlNormalizer,
83 ecosystem_rules: EcosystemRules,
84}
85
86impl FuzzyMatcher {
87 pub fn new(config: FuzzyMatchConfig) -> Self {
89 Self {
90 config,
91 alias_table: AliasTable::default(),
92 purl_normalizer: PurlNormalizer::new(),
93 ecosystem_rules: EcosystemRules::new(),
94 }
95 }
96
97 pub fn config(&self) -> &FuzzyMatchConfig {
99 &self.config
100 }
101
102 pub fn with_alias_table(mut self, table: AliasTable) -> Self {
104 self.alias_table = table;
105 self
106 }
107
108 pub fn match_components(&self, a: &Component, b: &Component) -> f64 {
110 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
112 let norm_a = self.purl_normalizer.normalize(purl_a);
113 let norm_b = self.purl_normalizer.normalize(purl_b);
114 if norm_a == norm_b {
115 return 1.0;
116 }
117 }
118
119 if self.check_alias_match(a, b) {
121 return 0.95;
122 }
123
124 if let Some(score) = self.check_ecosystem_rules(a, b) {
126 if score >= 0.90 {
127 return score;
128 }
129 }
130
131 if let Some(ref weights) = self.config.field_weights {
133 let result = self.compute_multi_field_score(a, b, weights);
135 if result.total >= self.config.threshold {
136 return result.total;
137 }
138 } else {
139 let fuzzy_score = self.compute_fuzzy_score(a, b);
141 if fuzzy_score >= self.config.threshold {
142 return fuzzy_score;
143 }
144 }
145
146 0.0
147 }
148
149 fn check_alias_match(&self, a: &Component, b: &Component) -> bool {
151 let names_a = self.get_all_names(a);
153 let names_b = self.get_all_names(b);
154
155 for name_a in &names_a {
156 if let Some(canonical) = self.alias_table.get_canonical(name_a) {
157 for name_b in &names_b {
158 if self.alias_table.is_alias(&canonical, name_b) {
159 return true;
160 }
161 }
162 }
163 }
164
165 false
166 }
167
168 fn get_all_names(&self, comp: &Component) -> Vec<String> {
170 let mut names = vec![comp.name.clone()];
171 names.extend(comp.identifiers.aliases.clone());
172
173 if let Some(purl) = &comp.identifiers.purl {
175 if let Some(name) = self.extract_name_from_purl(purl) {
176 names.push(name);
177 }
178 }
179
180 names
181 }
182
183 fn extract_name_from_purl(&self, purl: &str) -> Option<String> {
185 let without_pkg = purl.strip_prefix("pkg:")?;
187 let parts: Vec<&str> = without_pkg.split('/').collect();
188
189 if parts.len() >= 2 {
190 let name_part = parts.last()?;
191 let name = name_part.split('@').next()?;
193 Some(name.to_string())
194 } else {
195 None
196 }
197 }
198
199 fn check_ecosystem_rules(&self, a: &Component, b: &Component) -> Option<f64> {
201 let ecosystem_a = a.ecosystem.as_ref()?;
202 let ecosystem_b = b.ecosystem.as_ref()?;
203
204 if ecosystem_a != ecosystem_b {
206 return None;
207 }
208
209 let norm_a = self.ecosystem_rules.normalize_name(&a.name, ecosystem_a);
210 let norm_b = self.ecosystem_rules.normalize_name(&b.name, ecosystem_b);
211
212 if norm_a == norm_b {
213 return Some(0.90);
214 }
215
216 None
217 }
218
219 fn compute_fuzzy_score(&self, a: &Component, b: &Component) -> f64 {
221 let name_a = a.name.to_lowercase();
222 let name_b = b.name.to_lowercase();
223
224 let jw_score = jaro_winkler(&name_a, &name_b);
226
227 let max_len = name_a.len().max(name_b.len());
229 let lev_distance = levenshtein(&name_a, &name_b);
230 let lev_score = if max_len > 0 {
231 1.0 - (lev_distance as f64 / max_len as f64)
232 } else {
233 1.0
234 };
235
236 let token_score = Self::compute_token_similarity(&name_a, &name_b);
238
239 let phonetic_score = Self::compute_phonetic_similarity(&name_a, &name_b);
241
242 let char_score = (jw_score * self.config.jaro_winkler_weight)
244 + (lev_score * self.config.levenshtein_weight);
245
246 let combined = char_score.max(token_score).max(phonetic_score * 0.85);
249
250 let version_boost = Self::compute_version_similarity(&a.version, &b.version);
252
253 (combined + version_boost).min(1.0)
254 }
255
256 fn compute_token_similarity(name_a: &str, name_b: &str) -> f64 {
261 use std::collections::HashSet;
262
263 let tokens_a: HashSet<&str> = name_a
264 .split(['-', '_', '.', '@', '/'])
265 .filter(|t| !t.is_empty())
266 .collect();
267
268 let tokens_b: HashSet<&str> = name_b
269 .split(['-', '_', '.', '@', '/'])
270 .filter(|t| !t.is_empty())
271 .collect();
272
273 if tokens_a.is_empty() && tokens_b.is_empty() {
274 return 1.0;
275 }
276 if tokens_a.is_empty() || tokens_b.is_empty() {
277 return 0.0;
278 }
279
280 let intersection = tokens_a.intersection(&tokens_b).count();
281 let union = tokens_a.union(&tokens_b).count();
282
283 if union > 0 {
284 intersection as f64 / union as f64
285 } else {
286 0.0
287 }
288 }
289
290 fn compute_version_similarity(va: &Option<String>, vb: &Option<String>) -> f64 {
299 match (va, vb) {
300 (Some(a), Some(b)) if a == b => 0.10, (Some(a), Some(b)) => {
302 let parts_a: Vec<&str> = a.split('.').collect();
304 let parts_b: Vec<&str> = b.split('.').collect();
305
306 let major_a = parts_a.first().and_then(|s| s.parse::<u32>().ok());
308 let major_b = parts_b.first().and_then(|s| s.parse::<u32>().ok());
309 let minor_a = parts_a
310 .get(1)
311 .and_then(|s| s.split('-').next())
312 .and_then(|s| s.parse::<u32>().ok());
313 let minor_b = parts_b
314 .get(1)
315 .and_then(|s| s.split('-').next())
316 .and_then(|s| s.parse::<u32>().ok());
317
318 match (major_a, major_b, minor_a, minor_b) {
319 (Some(ma), Some(mb), Some(mia), Some(mib)) if ma == mb && mia == mib => 0.07,
320 (Some(ma), Some(mb), _, _) if ma == mb => 0.04,
321 _ => 0.0,
322 }
323 }
324 _ => 0.0, }
326 }
327
328 fn soundex(name: &str) -> String {
335 if name.is_empty() {
336 return String::new();
337 }
338
339 let name_upper: String = name
340 .to_uppercase()
341 .chars()
342 .filter(|c| c.is_ascii_alphabetic())
343 .collect();
344 if name_upper.is_empty() {
345 return String::new();
346 }
347
348 let mut chars = name_upper.chars();
349 let first_char = chars.next().expect("name_upper is non-empty after empty check above");
350 let mut code = String::with_capacity(4);
351 code.push(first_char);
352
353 let mut last_digit = Self::soundex_digit(first_char);
354
355 for c in chars {
356 let digit = Self::soundex_digit(c);
357 if digit != '0' && digit != last_digit {
358 code.push(digit);
359 if code.len() == 4 {
360 break;
361 }
362 }
363 if digit != '0' {
364 last_digit = digit;
365 }
366 }
367
368 while code.len() < 4 {
370 code.push('0');
371 }
372
373 code
374 }
375
376 fn soundex_digit(c: char) -> char {
378 match c {
379 'B' | 'F' | 'P' | 'V' => '1',
380 'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' => '2',
381 'D' | 'T' => '3',
382 'L' => '4',
383 'M' | 'N' => '5',
384 'R' => '6',
385 _ => '0', }
387 }
388
389 pub fn compute_phonetic_similarity(name_a: &str, name_b: &str) -> f64 {
394 let soundex_a = Self::soundex(name_a);
396 let soundex_b = Self::soundex(name_b);
397
398 if !soundex_a.is_empty() && soundex_a == soundex_b {
399 return 1.0;
400 }
401
402 let tokens_a: Vec<&str> = name_a
404 .split(|c: char| !c.is_alphanumeric())
405 .filter(|t| !t.is_empty())
406 .collect();
407 let tokens_b: Vec<&str> = name_b
408 .split(|c: char| !c.is_alphanumeric())
409 .filter(|t| !t.is_empty())
410 .collect();
411
412 if tokens_a.is_empty() || tokens_b.is_empty() {
413 return 0.0;
414 }
415
416 let mut matches = 0;
418 let total = tokens_a.len().max(tokens_b.len());
419
420 for ta in &tokens_a {
421 let sa = Self::soundex(ta);
422 if sa.is_empty() {
423 continue;
424 }
425 for tb in &tokens_b {
426 let sb = Self::soundex(tb);
427 if sa == sb {
428 matches += 1;
429 break;
430 }
431 }
432 }
433
434 if total == 0 {
435 0.0
436 } else {
437 matches as f64 / total as f64
438 }
439 }
440
441 pub fn compute_multi_field_score(
445 &self,
446 a: &Component,
447 b: &Component,
448 weights: &config::MultiFieldWeights,
449 ) -> MultiFieldScoreResult {
450 use std::collections::HashSet;
451
452 let mut result = MultiFieldScoreResult::default();
453
454 let name_score = self.compute_fuzzy_score(a, b);
456 result.name_score = name_score;
457 result.total += name_score * weights.name;
458
459 let version_score = if weights.version_divergence_enabled {
461 compute_version_divergence_score(&a.version, &b.version, weights)
462 } else {
463 match (&a.version, &b.version) {
465 (Some(va), Some(vb)) if va == vb => 1.0,
466 (None, None) => 0.5, _ => 0.0,
468 }
469 };
470 result.version_score = version_score;
471 result.total += version_score * weights.version;
472
473 let (ecosystem_score, ecosystem_penalty) = match (&a.ecosystem, &b.ecosystem) {
475 (Some(ea), Some(eb)) if ea == eb => (1.0, 0.0),
476 (None, None) => (0.5, 0.0), (Some(_), Some(_)) => (0.0, weights.ecosystem_mismatch_penalty), _ => (0.0, 0.0), };
480 result.ecosystem_score = ecosystem_score;
481 result.total += ecosystem_score * weights.ecosystem + ecosystem_penalty;
482
483 let licenses_a: HashSet<_> = a
485 .licenses
486 .declared
487 .iter()
488 .map(|l| l.expression.as_str())
489 .collect();
490 let licenses_b: HashSet<_> = b
491 .licenses
492 .declared
493 .iter()
494 .map(|l| l.expression.as_str())
495 .collect();
496 let license_score = if licenses_a.is_empty() && licenses_b.is_empty() {
497 0.5 } else if licenses_a.is_empty() || licenses_b.is_empty() {
499 0.0 } else {
501 let intersection = licenses_a.intersection(&licenses_b).count();
502 let union = licenses_a.union(&licenses_b).count();
503 if union > 0 {
504 intersection as f64 / union as f64
505 } else {
506 0.0
507 }
508 };
509 result.license_score = license_score;
510 result.total += license_score * weights.licenses;
511
512 let supplier_score = match (&a.supplier, &b.supplier) {
514 (Some(sa), Some(sb)) if sa.name.to_lowercase() == sb.name.to_lowercase() => 1.0,
515 (None, None) => 0.5, _ => 0.0,
517 };
518 result.supplier_score = supplier_score;
519 result.total += supplier_score * weights.supplier;
520
521 let group_score = match (&a.group, &b.group) {
523 (Some(ga), Some(gb)) if ga.to_lowercase() == gb.to_lowercase() => 1.0,
524 (None, None) => 0.5, _ => 0.0,
526 };
527 result.group_score = group_score;
528 result.total += group_score * weights.group;
529
530 result.total = result.total.clamp(0.0, 1.0);
532
533 result
534 }
535}
536
537fn compute_version_divergence_score(
545 version_a: &Option<String>,
546 version_b: &Option<String>,
547 weights: &config::MultiFieldWeights,
548) -> f64 {
549 match (version_a, version_b) {
550 (Some(va), Some(vb)) if va == vb => 1.0,
551 (None, None) => 0.5, (Some(va), Some(vb)) => {
553 let parts_a = parse_semver_parts(va);
555 let parts_b = parse_semver_parts(vb);
556
557 match (parts_a, parts_b) {
558 (Some((maj_a, min_a, patch_a)), Some((maj_b, min_b, patch_b))) => {
559 if maj_a == maj_b && min_a == min_b {
560 let patch_diff = (patch_a as i64 - patch_b as i64).unsigned_abs() as f64;
562 (0.8 - patch_diff * 0.01).max(0.5)
563 } else if maj_a == maj_b {
564 let minor_diff = (min_a as i64 - min_b as i64).unsigned_abs() as f64;
566 (0.5 - minor_diff * weights.version_minor_penalty).max(0.2)
567 } else {
568 let major_diff = (maj_a as i64 - maj_b as i64).unsigned_abs() as f64;
570 (0.3 - major_diff * weights.version_major_penalty).max(0.0)
571 }
572 }
573 _ => {
574 let common_prefix_len = va
577 .chars()
578 .zip(vb.chars())
579 .take_while(|(a, b)| a == b)
580 .count();
581 let max_len = va.len().max(vb.len());
582 if max_len > 0 && common_prefix_len > 0 {
583 (common_prefix_len as f64 / max_len as f64 * 0.5).min(0.4)
584 } else {
585 0.1 }
587 }
588 }
589 }
590 _ => 0.0, }
592}
593
594fn parse_semver_parts(version: &str) -> Option<(u32, u32, u32)> {
597 let version = version.trim_start_matches(['v', 'V']);
599
600 let mut parts = version.split(['.', '-', '+']);
602
603 let major: u32 = parts.next()?.parse().ok()?;
604 let minor: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
605 let patch: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
606
607 Some((major, minor, patch))
608}
609
610#[derive(Debug, Clone, Default)]
612pub struct MultiFieldScoreResult {
613 pub total: f64,
615 pub name_score: f64,
617 pub version_score: f64,
619 pub ecosystem_score: f64,
621 pub license_score: f64,
623 pub supplier_score: f64,
625 pub group_score: f64,
627}
628
629impl MultiFieldScoreResult {
630 pub fn summary(&self) -> String {
632 format!(
633 "Total: {:.2} (name: {:.2}, version: {:.2}, ecosystem: {:.2}, licenses: {:.2}, supplier: {:.2}, group: {:.2})",
634 self.total,
635 self.name_score,
636 self.version_score,
637 self.ecosystem_score,
638 self.license_score,
639 self.supplier_score,
640 self.group_score
641 )
642 }
643}
644
645impl Default for FuzzyMatcher {
646 fn default() -> Self {
647 Self::new(FuzzyMatchConfig::balanced())
648 }
649}
650
651impl ComponentMatcher for FuzzyMatcher {
652 fn match_score(&self, a: &Component, b: &Component) -> f64 {
653 self.match_components(a, b)
654 }
655
656 fn match_detailed(&self, a: &Component, b: &Component) -> MatchResult {
657 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
659 let norm_a = self.purl_normalizer.normalize(purl_a);
660 let norm_b = self.purl_normalizer.normalize(purl_b);
661 if norm_a == norm_b {
662 return MatchResult::with_metadata(
663 1.0,
664 MatchTier::ExactIdentifier,
665 MatchMetadata {
666 matched_fields: vec!["purl".to_string()],
667 normalization: Some("purl_normalized".to_string()),
668 rule_id: None,
669 },
670 );
671 }
672 }
673
674 if self.check_alias_match(a, b) {
676 return MatchResult::with_metadata(
677 0.95,
678 MatchTier::Alias,
679 MatchMetadata {
680 matched_fields: vec!["name".to_string()],
681 normalization: Some("alias_table".to_string()),
682 rule_id: None,
683 },
684 );
685 }
686
687 if let Some(score) = self.check_ecosystem_rules(a, b) {
689 if score >= 0.90 {
690 return MatchResult::with_metadata(
691 score,
692 MatchTier::EcosystemRule,
693 MatchMetadata {
694 matched_fields: vec!["name".to_string(), "ecosystem".to_string()],
695 normalization: Some("ecosystem_rules".to_string()),
696 rule_id: None,
697 },
698 );
699 }
700 }
701
702 let fuzzy_score = self.compute_fuzzy_score(a, b);
704 if fuzzy_score >= self.config.threshold {
705 return MatchResult::with_metadata(
706 fuzzy_score,
707 MatchTier::Fuzzy,
708 MatchMetadata {
709 matched_fields: vec!["name".to_string()],
710 normalization: Some("fuzzy_similarity".to_string()),
711 rule_id: None,
712 },
713 );
714 }
715
716 MatchResult::no_match()
717 }
718
719 fn name(&self) -> &str {
720 "FuzzyMatcher"
721 }
722
723 fn threshold(&self) -> f64 {
724 self.config.threshold
725 }
726
727 fn explain_match(&self, a: &Component, b: &Component) -> MatchExplanation {
728 use strsim::{jaro_winkler, levenshtein};
729
730 if let (Some(purl_a), Some(purl_b)) = (&a.identifiers.purl, &b.identifiers.purl) {
732 let norm_a = self.purl_normalizer.normalize(purl_a);
733 let norm_b = self.purl_normalizer.normalize(purl_b);
734 if norm_a == norm_b {
735 return MatchExplanation::matched(
736 MatchTier::ExactIdentifier,
737 1.0,
738 format!(
739 "Exact PURL match: '{}' equals '{}' after normalization",
740 purl_a, purl_b
741 ),
742 )
743 .with_normalization("purl_normalized");
744 }
745 }
746
747 if self.check_alias_match(a, b) {
749 return MatchExplanation::matched(
750 MatchTier::Alias,
751 0.95,
752 format!(
753 "'{}' and '{}' are known aliases of the same package",
754 a.name, b.name
755 ),
756 )
757 .with_normalization("alias_table");
758 }
759
760 if let Some(score) = self.check_ecosystem_rules(a, b) {
762 if score >= 0.90 {
763 let ecosystem = a
764 .ecosystem
765 .as_ref()
766 .map(|e| e.to_string())
767 .unwrap_or_else(|| "unknown".to_string());
768 return MatchExplanation::matched(
769 MatchTier::EcosystemRule,
770 score,
771 format!(
772 "Names match after {} ecosystem normalization: '{}' -> '{}'",
773 ecosystem, a.name, b.name
774 ),
775 )
776 .with_normalization(format!("{}_normalization", ecosystem));
777 }
778 }
779
780 let name_a = a.name.to_lowercase();
782 let name_b = b.name.to_lowercase();
783
784 let jw_score = jaro_winkler(&name_a, &name_b);
785 let max_len = name_a.len().max(name_b.len());
786 let lev_distance = levenshtein(&name_a, &name_b);
787 let lev_score = if max_len > 0 {
788 1.0 - (lev_distance as f64 / max_len as f64)
789 } else {
790 1.0
791 };
792
793 let jw_weighted = jw_score * self.config.jaro_winkler_weight;
794 let lev_weighted = lev_score * self.config.levenshtein_weight;
795
796 let version_boost = if a.version == b.version && a.version.is_some() {
797 0.05
798 } else {
799 0.0
800 };
801
802 let combined = (jw_weighted + lev_weighted + version_boost).min(1.0);
803
804 let mut explanation = if combined >= self.config.threshold {
805 MatchExplanation::matched(
806 MatchTier::Fuzzy,
807 combined,
808 format!(
809 "Fuzzy match: '{}' ~ '{}' with {:.0}% similarity",
810 a.name,
811 b.name,
812 combined * 100.0
813 ),
814 )
815 } else {
816 MatchExplanation::no_match(format!(
817 "Fuzzy similarity {:.2} below threshold {:.2}",
818 combined, self.config.threshold
819 ))
820 };
821
822 explanation = explanation
824 .with_score_component(ScoreComponent {
825 name: "Jaro-Winkler".to_string(),
826 weight: self.config.jaro_winkler_weight,
827 raw_score: jw_score,
828 weighted_score: jw_weighted,
829 description: format!("'{}' vs '{}' = {:.2}", name_a, name_b, jw_score),
830 })
831 .with_score_component(ScoreComponent {
832 name: "Levenshtein".to_string(),
833 weight: self.config.levenshtein_weight,
834 raw_score: lev_score,
835 weighted_score: lev_weighted,
836 description: format!(
837 "edit distance {} / max_len {} = {:.2}",
838 lev_distance, max_len, lev_score
839 ),
840 });
841
842 if version_boost > 0.0 {
843 explanation = explanation.with_score_component(ScoreComponent {
844 name: "Version boost".to_string(),
845 weight: 1.0,
846 raw_score: version_boost,
847 weighted_score: version_boost,
848 description: format!("versions match: {:?}", a.version),
849 });
850 }
851
852 explanation.with_normalization("lowercase")
853 }
854}
855
856#[cfg(test)]
857mod tests {
858 use super::*;
859
860 #[test]
861 fn test_exact_purl_match() {
862 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
863
864 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
865 a.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
866
867 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
868 b.identifiers.purl = Some("pkg:npm/lodash@4.17.21".to_string());
869
870 assert_eq!(matcher.match_components(&a, &b), 1.0);
871 }
872
873 #[test]
874 fn test_fuzzy_name_match() {
875 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
876
877 let a = Component::new("lodash-es".to_string(), "comp-1".to_string());
879 let b = Component::new("lodash".to_string(), "comp-2".to_string());
880
881 let score = matcher.match_components(&a, &b);
882 assert!(
884 score >= 0.70,
885 "lodash-es vs lodash should have score >= 0.70, got {}",
886 score
887 );
888 }
889
890 #[test]
891 fn test_different_names_low_score() {
892 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::strict());
893
894 let a = Component::new("react".to_string(), "comp-1".to_string());
895 let b = Component::new("angular".to_string(), "comp-2".to_string());
896
897 let score = matcher.match_components(&a, &b);
898 assert!(
899 score < 0.5,
900 "react vs angular should have low score, got {}",
901 score
902 );
903 }
904
905 #[test]
906 fn test_multi_field_weights_normalized() {
907 let weights = config::MultiFieldWeights::balanced();
908 assert!(
909 weights.is_normalized(),
910 "Balanced weights should be normalized"
911 );
912
913 let weights = config::MultiFieldWeights::name_focused();
914 assert!(
915 weights.is_normalized(),
916 "Name-focused weights should be normalized"
917 );
918
919 let weights = config::MultiFieldWeights::security_focused();
920 assert!(
921 weights.is_normalized(),
922 "Security-focused weights should be normalized"
923 );
924 }
925
926 #[test]
927 fn test_multi_field_scoring_same_component() {
928 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
929 let weights = config::MultiFieldWeights::balanced();
930
931 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
932 a.version = Some("4.17.21".to_string());
933 a.ecosystem = Some(crate::model::Ecosystem::Npm);
934
935 let result = matcher.compute_multi_field_score(&a, &a, &weights);
938 assert!(
939 result.total > 0.90,
940 "Same component should score > 0.90, got {}",
941 result.total
942 );
943 assert_eq!(result.name_score, 1.0);
944 assert_eq!(result.version_score, 1.0);
945 assert_eq!(result.ecosystem_score, 1.0);
946 assert_eq!(
948 result.license_score, 0.5,
949 "Empty licenses should be neutral"
950 );
951 assert_eq!(
952 result.supplier_score, 0.5,
953 "Empty supplier should be neutral"
954 );
955 assert_eq!(result.group_score, 0.5, "Empty group should be neutral");
956 }
957
958 #[test]
959 fn test_multi_field_scoring_different_versions() {
960 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
961 let weights = config::MultiFieldWeights::balanced();
962
963 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
964 a.version = Some("4.17.21".to_string());
965 a.ecosystem = Some(crate::model::Ecosystem::Npm);
966
967 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
968 b.version = Some("4.17.20".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
970
971 let result = matcher.compute_multi_field_score(&a, &b, &weights);
972
973 assert!(result.name_score > 0.9, "Name score should be > 0.9");
975
976 assert!(
980 result.version_score > 0.7,
981 "Same major.minor with patch diff should score high, got {}",
982 result.version_score
983 );
984
985 assert_eq!(
987 result.ecosystem_score, 1.0,
988 "Same ecosystem should score 1.0"
989 );
990
991 assert!(
993 result.total > 0.8,
994 "Total should be > 0.8, got {}",
995 result.total
996 );
997 }
998
999 #[test]
1000 fn test_multi_field_scoring_different_major_versions() {
1001 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
1002 let weights = config::MultiFieldWeights::balanced();
1003
1004 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
1005 a.version = Some("4.17.21".to_string());
1006 a.ecosystem = Some(crate::model::Ecosystem::Npm);
1007
1008 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
1009 b.version = Some("3.10.0".to_string()); b.ecosystem = Some(crate::model::Ecosystem::Npm);
1011
1012 let result = matcher.compute_multi_field_score(&a, &b, &weights);
1013
1014 assert!(
1018 result.version_score < 0.3,
1019 "Different major versions should score low, got {}",
1020 result.version_score
1021 );
1022 }
1023
1024 #[test]
1025 fn test_multi_field_scoring_legacy_weights() {
1026 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced_multi_field());
1028 let weights = config::MultiFieldWeights::legacy();
1029
1030 let mut a = Component::new("lodash".to_string(), "comp-1".to_string());
1031 a.version = Some("4.17.21".to_string());
1032 a.ecosystem = Some(crate::model::Ecosystem::Npm);
1033
1034 let mut b = Component::new("lodash".to_string(), "comp-2".to_string());
1035 b.version = Some("4.17.20".to_string());
1036 b.ecosystem = Some(crate::model::Ecosystem::Npm);
1037
1038 let result = matcher.compute_multi_field_score(&a, &b, &weights);
1039
1040 assert_eq!(
1042 result.version_score, 0.0,
1043 "Legacy mode: different versions should score 0"
1044 );
1045 }
1046
1047 #[test]
1048 fn test_multi_field_config_preset() {
1049 let config = FuzzyMatchConfig::from_preset("balanced-multi").unwrap();
1050 assert!(config.field_weights.is_some());
1051
1052 let config = FuzzyMatchConfig::from_preset("strict_multi").unwrap();
1053 assert!(config.field_weights.is_some());
1054 }
1055
1056 #[test]
1057 fn test_multi_field_score_result_summary() {
1058 let result = MultiFieldScoreResult {
1059 total: 0.85,
1060 name_score: 1.0,
1061 version_score: 0.0,
1062 ecosystem_score: 1.0,
1063 license_score: 0.5,
1064 supplier_score: 0.5,
1065 group_score: 0.5,
1066 };
1067
1068 let summary = result.summary();
1069 assert!(summary.contains("0.85"));
1070 assert!(summary.contains("name: 1.00"));
1071 }
1072
1073 #[test]
1074 fn test_token_similarity_exact() {
1075 let score = FuzzyMatcher::compute_token_similarity("react-dom", "react-dom");
1076 assert_eq!(score, 1.0);
1077 }
1078
1079 #[test]
1080 fn test_token_similarity_reordered() {
1081 let score = FuzzyMatcher::compute_token_similarity("react-dom", "dom-react");
1083 assert_eq!(score, 1.0, "Reordered tokens should match perfectly");
1084 }
1085
1086 #[test]
1087 fn test_token_similarity_partial() {
1088 let score = FuzzyMatcher::compute_token_similarity("react-dom-utils", "react-dom");
1090 assert!(
1092 (score - 0.667).abs() < 0.01,
1093 "Partial overlap should be ~0.67, got {}",
1094 score
1095 );
1096 }
1097
1098 #[test]
1099 fn test_token_similarity_different_delimiters() {
1100 let score = FuzzyMatcher::compute_token_similarity("my_package_name", "my-package-name");
1102 assert_eq!(score, 1.0, "Different delimiters should match");
1103 }
1104
1105 #[test]
1106 fn test_token_similarity_no_overlap() {
1107 let score = FuzzyMatcher::compute_token_similarity("react", "angular");
1108 assert_eq!(score, 0.0, "No common tokens should score 0");
1109 }
1110
1111 #[test]
1112 fn test_version_similarity_exact() {
1113 let score = FuzzyMatcher::compute_version_similarity(
1114 &Some("1.2.3".to_string()),
1115 &Some("1.2.3".to_string()),
1116 );
1117 assert_eq!(score, 0.10, "Exact version match should give max boost");
1118 }
1119
1120 #[test]
1121 fn test_version_similarity_same_major_minor() {
1122 let score = FuzzyMatcher::compute_version_similarity(
1123 &Some("1.2.3".to_string()),
1124 &Some("1.2.4".to_string()),
1125 );
1126 assert_eq!(score, 0.07, "Same major.minor should give 0.07 boost");
1127 }
1128
1129 #[test]
1130 fn test_version_similarity_same_major() {
1131 let score = FuzzyMatcher::compute_version_similarity(
1132 &Some("1.2.3".to_string()),
1133 &Some("1.5.0".to_string()),
1134 );
1135 assert_eq!(score, 0.04, "Same major should give 0.04 boost");
1136 }
1137
1138 #[test]
1139 fn test_version_similarity_different_major() {
1140 let score = FuzzyMatcher::compute_version_similarity(
1141 &Some("1.2.3".to_string()),
1142 &Some("2.0.0".to_string()),
1143 );
1144 assert_eq!(score, 0.0, "Different major versions should give no boost");
1145 }
1146
1147 #[test]
1148 fn test_version_similarity_prerelease() {
1149 let score = FuzzyMatcher::compute_version_similarity(
1151 &Some("1.2.3-beta".to_string()),
1152 &Some("1.2.4".to_string()),
1153 );
1154 assert_eq!(score, 0.07, "Prerelease should still match major.minor");
1155 }
1156
1157 #[test]
1158 fn test_version_similarity_missing() {
1159 let score = FuzzyMatcher::compute_version_similarity(&None, &Some("1.0.0".to_string()));
1160 assert_eq!(score, 0.0, "Missing version should give no boost");
1161
1162 let score = FuzzyMatcher::compute_version_similarity(&None, &None);
1163 assert_eq!(score, 0.0, "Both missing should give no boost");
1164 }
1165
1166 #[test]
1167 fn test_fuzzy_match_with_reordered_tokens() {
1168 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
1169
1170 let a = Component::new("react-dom".to_string(), "comp-1".to_string());
1171 let b = Component::new("dom-react".to_string(), "comp-2".to_string());
1172
1173 let score = matcher.match_components(&a, &b);
1174 assert!(
1176 score > 0.5,
1177 "Reordered names should still match, got {}",
1178 score
1179 );
1180 }
1181
1182 #[test]
1183 fn test_fuzzy_match_version_boost() {
1184 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
1185
1186 let mut a = Component::new("lodash-utils".to_string(), "comp-1".to_string());
1188 a.version = Some("4.17.21".to_string());
1189
1190 let mut b = Component::new("lodash-util".to_string(), "comp-2".to_string());
1191 b.version = Some("4.17.20".to_string()); let mut c = Component::new("lodash-util".to_string(), "comp-3".to_string());
1194 c.version = Some("5.0.0".to_string()); let score_same_minor = matcher.match_components(&a, &b);
1197 let score_diff_major = matcher.match_components(&a, &c);
1198
1199 assert!(score_same_minor > 0.0, "Same minor should match");
1201 assert!(score_diff_major > 0.0, "Different major should still match");
1202 assert!(
1203 score_same_minor > score_diff_major,
1204 "Same minor version should score higher: {} vs {}",
1205 score_same_minor,
1206 score_diff_major
1207 );
1208 }
1209
1210 #[test]
1211 fn test_soundex_basic() {
1212 assert_eq!(FuzzyMatcher::soundex("Robert"), "R163");
1214 assert_eq!(FuzzyMatcher::soundex("Rupert"), "R163"); assert_eq!(FuzzyMatcher::soundex("Smith"), "S530");
1216 assert_eq!(FuzzyMatcher::soundex("Smyth"), "S530"); }
1218
1219 #[test]
1220 fn test_soundex_empty() {
1221 assert_eq!(FuzzyMatcher::soundex(""), "");
1222 assert_eq!(FuzzyMatcher::soundex("123"), ""); }
1224
1225 #[test]
1226 fn test_phonetic_similarity_exact() {
1227 let score = FuzzyMatcher::compute_phonetic_similarity("color", "colour");
1228 assert_eq!(score, 1.0, "color and colour should match phonetically");
1229 }
1230
1231 #[test]
1232 fn test_phonetic_similarity_different() {
1233 let score = FuzzyMatcher::compute_phonetic_similarity("react", "angular");
1234 assert!(
1235 score < 0.5,
1236 "Different names should have low phonetic similarity"
1237 );
1238 }
1239
1240 #[test]
1241 fn test_phonetic_similarity_compound() {
1242 let score = FuzzyMatcher::compute_phonetic_similarity("json-parser", "jayson-parser");
1244 assert!(
1245 score > 0.5,
1246 "Similar sounding compound names should match: {}",
1247 score
1248 );
1249 }
1250
1251 #[test]
1252 fn test_fuzzy_match_with_phonetic() {
1253 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::permissive());
1254
1255 let a = Component::new("color-utils".to_string(), "comp-1".to_string());
1256 let b = Component::new("colour-utils".to_string(), "comp-2".to_string());
1257
1258 let score = matcher.match_components(&a, &b);
1259 assert!(
1260 score > 0.7,
1261 "Phonetically similar names should match: {}",
1262 score
1263 );
1264 }
1265}