1use crate::model::Ecosystem;
7use regex::Regex;
8use std::collections::HashMap;
9
10use super::ecosystem_config::{
11 ConfigError, EcosystemConfig, EcosystemRulesConfig, NormalizationConfig, ScopeHandling,
12 TyposquatEntry,
13};
14
15pub struct EcosystemRules {
17 config: EcosystemRulesConfig,
19 suspicious_patterns: HashMap<String, Vec<Regex>>,
21 migration_patterns: HashMap<String, Vec<(Regex, String)>>,
23 package_group_patterns: HashMap<String, HashMap<String, Vec<Regex>>>,
26}
27
28impl EcosystemRules {
29 #[must_use]
31 pub fn new() -> Self {
32 Self::with_config(EcosystemRulesConfig::builtin())
33 }
34
35 #[must_use]
37 pub fn with_config(config: EcosystemRulesConfig) -> Self {
38 let suspicious_patterns = Self::compile_suspicious_patterns(&config);
39 let migration_patterns = Self::compile_migration_patterns(&config);
40 let package_group_patterns = Self::compile_package_group_patterns(&config);
41
42 Self {
43 config,
44 suspicious_patterns,
45 migration_patterns,
46 package_group_patterns,
47 }
48 }
49
50 pub fn from_file(path: &std::path::Path) -> Result<Self, ConfigError> {
52 let config = EcosystemRulesConfig::from_file(path)?;
53 Ok(Self::with_config(config))
54 }
55
56 #[must_use]
58 pub fn from_default_locations() -> Self {
59 let config = EcosystemRulesConfig::load_with_precedence(&[
60 ".sbom-tools/ecosystem-rules.yaml",
61 ".sbom-tools/ecosystem-rules.json",
62 "~/.config/sbom-tools/ecosystem-rules.yaml",
63 "~/.config/sbom-tools/ecosystem-rules.json",
64 ])
65 .unwrap_or_else(|_| EcosystemRulesConfig::builtin());
66
67 Self::with_config(config)
68 }
69
70 fn compile_suspicious_patterns(config: &EcosystemRulesConfig) -> HashMap<String, Vec<Regex>> {
72 let mut patterns = HashMap::with_capacity(config.ecosystems.len());
73
74 for (ecosystem, eco_config) in &config.ecosystems {
75 let mut compiled = Vec::with_capacity(eco_config.security.suspicious_patterns.len());
76 for pattern in &eco_config.security.suspicious_patterns {
77 if let Ok(re) = Regex::new(pattern) {
78 compiled.push(re);
79 }
80 }
81 if !compiled.is_empty() {
82 patterns.insert(ecosystem.clone(), compiled);
83 }
84 }
85
86 patterns
87 }
88
89 fn compile_migration_patterns(
91 config: &EcosystemRulesConfig,
92 ) -> HashMap<String, Vec<(Regex, String)>> {
93 let mut patterns = HashMap::with_capacity(config.ecosystems.len());
94
95 for (ecosystem, eco_config) in &config.ecosystems {
96 let mut compiled = Vec::with_capacity(eco_config.group_migrations.len());
97 for migration in &eco_config.group_migrations {
98 let regex_pattern = migration.from.replace('.', r"\.").replace('*', ".*");
100 if let Ok(re) = Regex::new(&format!("^{regex_pattern}$")) {
101 compiled.push((re, migration.to.clone()));
102 }
103 }
104 if !compiled.is_empty() {
105 patterns.insert(ecosystem.clone(), compiled);
106 }
107 }
108
109 patterns
110 }
111
112 fn compile_package_group_patterns(
114 config: &EcosystemRulesConfig,
115 ) -> HashMap<String, HashMap<String, Vec<Regex>>> {
116 let mut eco_patterns = HashMap::with_capacity(config.ecosystems.len());
117
118 for (ecosystem, eco_config) in &config.ecosystems {
119 let mut group_patterns = HashMap::with_capacity(eco_config.package_groups.len());
120
121 for (group_name, group) in &eco_config.package_groups {
122 let glob_count = group.members.iter().filter(|m| m.contains('*')).count();
124 let mut compiled = Vec::with_capacity(glob_count);
125 for member in &group.members {
126 if member.contains('*') {
127 let regex_pattern = member.replace('.', r"\.").replace('*', ".*");
129 if let Ok(re) = Regex::new(&format!("^{regex_pattern}$")) {
130 compiled.push(re);
131 }
132 }
133 }
134 if !compiled.is_empty() {
135 group_patterns.insert(group_name.clone(), compiled);
136 }
137 }
138
139 if !group_patterns.is_empty() {
140 eco_patterns.insert(ecosystem.clone(), group_patterns);
141 }
142 }
143
144 eco_patterns
145 }
146
147 #[must_use]
149 pub const fn config(&self) -> &EcosystemRulesConfig {
150 &self.config
151 }
152
153 #[must_use]
155 pub fn normalize_name(&self, name: &str, ecosystem: &Ecosystem) -> String {
156 let eco_key = Self::ecosystem_key(ecosystem);
157
158 self.config.ecosystems.get(&eco_key).map_or_else(
159 || {
160 name.to_lowercase()
162 },
163 |eco_config| self.apply_normalization(name, eco_config),
164 )
165 }
166
167 fn apply_normalization(&self, name: &str, config: &EcosystemConfig) -> String {
169 let norm = &config.normalization;
170 let mut result = name.to_string();
171
172 if result.starts_with('@') {
174 result = self.normalize_scoped_name(&result, norm);
175 } else {
176 if !norm.case_sensitive {
178 result = result.to_lowercase();
179 }
180 }
181
182 for char_group in &norm.equivalent_chars {
184 if char_group.len() >= 2 {
185 let target = &char_group[0];
186 for source in &char_group[1..] {
187 result = result.replace(source.as_str(), target);
188 }
189 }
190 }
191
192 if norm.collapse_separators {
194 result = self.collapse_separators(&result);
195 }
196
197 if norm.strip_version_suffix {
199 result = self.strip_go_version_suffix(&result);
200 }
201
202 result
203 }
204
205 fn normalize_scoped_name(&self, name: &str, norm: &NormalizationConfig) -> String {
207 match norm.scope_handling {
208 ScopeHandling::Lowercase => name.to_lowercase(),
209 ScopeHandling::PreserveScopeCase => name.find('/').map_or_else(
210 || name.to_lowercase(),
211 |slash_pos| {
212 let scope = &name[..slash_pos];
213 let pkg_name = &name[slash_pos + 1..];
214 format!("{}/{}", scope.to_lowercase(), pkg_name.to_lowercase())
215 },
216 ),
217 ScopeHandling::PreserveCase => name.to_string(),
218 }
219 }
220
221 fn collapse_separators(&self, name: &str) -> String {
223 let mut result = String::with_capacity(name.len());
224 let mut last_was_sep = false;
225
226 for c in name.chars() {
227 let is_sep = c == '-' || c == '_' || c == '.';
228 if is_sep {
229 if !last_was_sep {
230 result.push(c);
231 }
232 last_was_sep = true;
233 } else {
234 result.push(c);
235 last_was_sep = false;
236 }
237 }
238
239 result
241 .trim_matches(|c| c == '-' || c == '_' || c == '.')
242 .to_string()
243 }
244
245 fn strip_go_version_suffix(&self, name: &str) -> String {
247 use std::sync::LazyLock;
248 static GO_VERSION_SUFFIX: LazyLock<Regex> =
249 LazyLock::new(|| Regex::new(r"/v\d+$").expect("static regex"));
250 GO_VERSION_SUFFIX.replace(name, "").to_string()
251 }
252
253 #[must_use]
255 pub fn names_match(&self, name_a: &str, name_b: &str, ecosystem: &Ecosystem) -> bool {
256 let norm_a = self.normalize_name(name_a, ecosystem);
257 let norm_b = self.normalize_name(name_b, ecosystem);
258 norm_a == norm_b
259 }
260
261 #[must_use]
263 pub fn get_canonical(&self, name: &str, ecosystem: &Ecosystem) -> Option<String> {
264 let eco_key = Self::ecosystem_key(ecosystem);
265 let name_lower = name.to_lowercase();
266
267 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
268 for (canonical, aliases) in &eco_config.aliases {
269 if canonical.to_lowercase() == name_lower {
270 return Some(canonical.clone());
271 }
272 for alias in aliases {
273 if alias.to_lowercase() == name_lower {
274 return Some(canonical.clone());
275 }
276 }
277 }
278 }
279
280 for equiv in &self.config.custom_rules.equivalences {
282 if equiv.canonical.to_lowercase() == name_lower {
283 return Some(equiv.canonical.clone());
284 }
285 for alias in &equiv.aliases {
286 if alias.to_lowercase() == name_lower {
287 return Some(equiv.canonical.clone());
288 }
289 }
290 }
291
292 None
293 }
294
295 #[must_use]
297 pub fn is_alias(&self, canonical: &str, name: &str, ecosystem: &Ecosystem) -> bool {
298 let eco_key = Self::ecosystem_key(ecosystem);
299 let name_lower = name.to_lowercase();
300 let canonical_lower = canonical.to_lowercase();
301
302 if let Some(eco_config) = self.config.ecosystems.get(&eco_key)
303 && let Some(aliases) = eco_config.aliases.get(&canonical_lower)
304 {
305 return aliases.iter().any(|a| a.to_lowercase() == name_lower);
306 }
307
308 false
309 }
310
311 #[must_use]
313 pub fn get_strip_suffixes(&self, ecosystem: &Ecosystem) -> Vec<&str> {
314 let eco_key = Self::ecosystem_key(ecosystem);
315
316 self.config
317 .ecosystems
318 .get(&eco_key)
319 .map(|c| {
320 c.strip_suffixes
321 .iter()
322 .map(std::string::String::as_str)
323 .collect()
324 })
325 .unwrap_or_default()
326 }
327
328 #[must_use]
330 pub fn get_strip_prefixes(&self, ecosystem: &Ecosystem) -> Vec<&str> {
331 let eco_key = Self::ecosystem_key(ecosystem);
332
333 self.config
334 .ecosystems
335 .get(&eco_key)
336 .map(|c| {
337 c.strip_prefixes
338 .iter()
339 .map(std::string::String::as_str)
340 .collect()
341 })
342 .unwrap_or_default()
343 }
344
345 #[must_use]
347 pub fn strip_affixes(&self, name: &str, ecosystem: &Ecosystem) -> String {
348 let mut result = name.to_lowercase();
349
350 for prefix in self.get_strip_prefixes(ecosystem) {
351 if result.starts_with(prefix) {
352 result = result[prefix.len()..].to_string();
353 break;
354 }
355 }
356
357 for suffix in self.get_strip_suffixes(ecosystem) {
358 if result.ends_with(suffix) {
359 result = result[..result.len() - suffix.len()].to_string();
360 break;
361 }
362 }
363
364 result
365 }
366
367 #[must_use]
369 pub fn is_typosquat(&self, name: &str, ecosystem: &Ecosystem) -> Option<&TyposquatEntry> {
370 if !self.config.settings.enable_security_checks {
371 return None;
372 }
373
374 let eco_key = Self::ecosystem_key(ecosystem);
375 let name_lower = name.to_lowercase();
376
377 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
378 for entry in &eco_config.security.known_typosquats {
379 if entry.malicious.to_lowercase() == name_lower {
380 return Some(entry);
381 }
382 }
383 }
384
385 None
386 }
387
388 #[must_use]
390 pub fn is_suspicious(&self, name: &str, ecosystem: &Ecosystem) -> bool {
391 if !self.config.settings.enable_security_checks {
392 return false;
393 }
394
395 let eco_key = Self::ecosystem_key(ecosystem);
396
397 self.suspicious_patterns
398 .get(&eco_key)
399 .is_some_and(|patterns| patterns.iter().any(|re| re.is_match(name)))
400 }
401
402 #[must_use]
404 pub fn is_known_malicious(&self, name: &str, ecosystem: &Ecosystem) -> bool {
405 if !self.config.settings.enable_security_checks {
406 return false;
407 }
408
409 let eco_key = Self::ecosystem_key(ecosystem);
410 let name_lower = name.to_lowercase();
411
412 self.config
413 .ecosystems
414 .get(&eco_key)
415 .is_some_and(|eco_config| {
416 eco_config
417 .security
418 .known_malicious
419 .iter()
420 .any(|m| m.to_lowercase() == name_lower)
421 })
422 }
423
424 #[must_use]
426 pub fn get_migrated_group(&self, group: &str, ecosystem: &Ecosystem) -> Option<String> {
427 let eco_key = Self::ecosystem_key(ecosystem);
428
429 if let Some(patterns) = self.migration_patterns.get(&eco_key) {
430 for (pattern, replacement) in patterns {
431 if pattern.is_match(group) {
432 let migrated = pattern.replace(group, replacement.as_str());
433 return Some(migrated.to_string());
434 }
435 }
436 }
437
438 None
439 }
440
441 #[must_use]
443 pub fn get_package_group(&self, name: &str, ecosystem: &Ecosystem) -> Option<&str> {
444 let eco_key = Self::ecosystem_key(ecosystem);
445 let name_lower = name.to_lowercase();
446
447 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
448 let compiled_patterns = self.package_group_patterns.get(&eco_key);
450
451 for (group_name, group) in &eco_config.package_groups {
452 if group.canonical.to_lowercase() == name_lower {
454 return Some(group_name);
455 }
456
457 for member in &group.members {
459 if member.contains('*') {
460 if let Some(group_patterns) = compiled_patterns
462 && let Some(patterns) = group_patterns.get(group_name)
463 && patterns.iter().any(|re| re.is_match(&name_lower))
464 {
465 return Some(group_name);
466 }
467 } else if member.to_lowercase() == name_lower {
468 return Some(group_name);
469 }
470 }
471 }
472 }
473
474 None
475 }
476
477 #[must_use]
479 pub fn get_cross_ecosystem_equivalent(
480 &self,
481 concept: &str,
482 target_ecosystem: &Ecosystem,
483 ) -> Option<&str> {
484 let eco_key = Self::ecosystem_key(target_ecosystem);
485
486 self.config
487 .cross_ecosystem
488 .get(concept)
489 .and_then(|mapping| mapping.get(&eco_key))
490 .and_then(|opt| opt.as_deref())
491 }
492
493 #[must_use]
495 pub fn is_internal_package(&self, name: &str) -> bool {
496 self.config
497 .custom_rules
498 .internal_prefixes
499 .iter()
500 .any(|prefix| name.starts_with(prefix))
501 }
502
503 #[must_use]
505 pub fn is_ignored(&self, name: &str) -> bool {
506 let name_lower = name.to_lowercase();
507 self.config
508 .custom_rules
509 .ignored_packages
510 .iter()
511 .any(|p| p.to_lowercase() == name_lower)
512 }
513
514 fn ecosystem_key(ecosystem: &Ecosystem) -> String {
516 match ecosystem {
517 Ecosystem::Npm => "npm".to_string(),
518 Ecosystem::PyPi => "pypi".to_string(),
519 Ecosystem::Cargo => "cargo".to_string(),
520 Ecosystem::Maven => "maven".to_string(),
521 Ecosystem::Golang => "golang".to_string(),
522 Ecosystem::Nuget => "nuget".to_string(),
523 Ecosystem::RubyGems => "rubygems".to_string(),
524 Ecosystem::Composer => "composer".to_string(),
525 Ecosystem::CocoaPods => "cocoapods".to_string(),
526 Ecosystem::Swift => "swift".to_string(),
527 Ecosystem::Hex => "hex".to_string(),
528 Ecosystem::Pub => "pub".to_string(),
529 Ecosystem::Hackage => "hackage".to_string(),
530 Ecosystem::Cpan => "cpan".to_string(),
531 Ecosystem::Cran => "cran".to_string(),
532 Ecosystem::Conda => "conda".to_string(),
533 Ecosystem::Conan => "conan".to_string(),
534 Ecosystem::Deb => "deb".to_string(),
535 Ecosystem::Rpm => "rpm".to_string(),
536 Ecosystem::Apk => "apk".to_string(),
537 Ecosystem::Generic => "generic".to_string(),
538 Ecosystem::Unknown(s) => s.to_lowercase(),
539 }
540 }
541}
542
543impl Default for EcosystemRules {
544 fn default() -> Self {
545 Self::new()
546 }
547}
548
549#[cfg(test)]
550mod tests {
551 use super::*;
552
553 #[test]
554 fn test_pypi_normalization() {
555 let rules = EcosystemRules::new();
556
557 assert_eq!(
558 rules.normalize_name("python-dateutil", &Ecosystem::PyPi),
559 "python-dateutil"
560 );
561 assert_eq!(
562 rules.normalize_name("python_dateutil", &Ecosystem::PyPi),
563 "python-dateutil"
564 );
565 assert_eq!(
566 rules.normalize_name("Python.Dateutil", &Ecosystem::PyPi),
567 "python-dateutil"
568 );
569 }
570
571 #[test]
572 fn test_cargo_normalization() {
573 let rules = EcosystemRules::new();
574
575 assert_eq!(
576 rules.normalize_name("serde-json", &Ecosystem::Cargo),
577 "serde_json"
578 );
579 assert_eq!(
580 rules.normalize_name("serde_json", &Ecosystem::Cargo),
581 "serde_json"
582 );
583 }
584
585 #[test]
586 fn test_npm_scoped_normalization() {
587 let rules = EcosystemRules::new();
588
589 assert_eq!(
590 rules.normalize_name("@Angular/Core", &Ecosystem::Npm),
591 "@angular/core"
592 );
593 }
594
595 #[test]
596 fn test_names_match() {
597 let rules = EcosystemRules::new();
598
599 assert!(rules.names_match("python-dateutil", "python_dateutil", &Ecosystem::PyPi));
600 assert!(rules.names_match("serde-json", "serde_json", &Ecosystem::Cargo));
601 }
602
603 #[test]
604 fn test_strip_affixes() {
605 let rules = EcosystemRules::new();
606
607 assert_eq!(
608 rules.strip_affixes("python-requests", &Ecosystem::PyPi),
609 "requests"
610 );
611 assert_eq!(rules.strip_affixes("lodash-js", &Ecosystem::Npm), "lodash");
612 }
613
614 #[test]
615 fn test_typosquat_detection() {
616 let rules = EcosystemRules::new();
617
618 let result = rules.is_typosquat("python-dateutils", &Ecosystem::PyPi);
619 assert!(result.is_some());
620 assert_eq!(result.unwrap().legitimate, "python-dateutil");
621
622 assert!(rules.is_typosquat("requests", &Ecosystem::PyPi).is_none());
623 }
624
625 #[test]
626 fn test_package_group() {
627 let rules = EcosystemRules::new();
628
629 assert_eq!(
630 rules.get_package_group("lodash-es", &Ecosystem::Npm),
631 Some("lodash")
632 );
633 assert_eq!(
634 rules.get_package_group("lodash", &Ecosystem::Npm),
635 Some("lodash")
636 );
637 }
638
639 #[test]
640 fn test_cross_ecosystem() {
641 let rules = EcosystemRules::new();
642
643 assert_eq!(
644 rules.get_cross_ecosystem_equivalent("yaml_parsing", &Ecosystem::PyPi),
645 Some("pyyaml")
646 );
647 assert_eq!(
648 rules.get_cross_ecosystem_equivalent("yaml_parsing", &Ecosystem::Npm),
649 Some("js-yaml")
650 );
651 }
652
653 #[test]
654 fn test_go_version_suffix() {
655 let rules = EcosystemRules::new();
656
657 assert_eq!(
658 rules.normalize_name("github.com/foo/bar/v2", &Ecosystem::Golang),
659 "github.com/foo/bar"
660 );
661 assert_eq!(
662 rules.normalize_name("github.com/foo/bar", &Ecosystem::Golang),
663 "github.com/foo/bar"
664 );
665 }
666
667 #[test]
668 fn test_canonical_lookup() {
669 let rules = EcosystemRules::new();
670
671 assert_eq!(
672 rules.get_canonical("PIL", &Ecosystem::PyPi),
673 Some("pillow".to_string())
674 );
675 assert_eq!(
676 rules.get_canonical("sklearn", &Ecosystem::PyPi),
677 Some("scikit-learn".to_string())
678 );
679 }
680
681 #[test]
682 fn test_custom_config() {
683 let yaml = r#"
684version: "1.0"
685custom_rules:
686 internal_prefixes:
687 - "@mycompany/"
688 ignored_packages:
689 - "internal-tool"
690"#;
691 let config = EcosystemRulesConfig::from_yaml(yaml).unwrap();
692 let rules = EcosystemRules::with_config(config);
693
694 assert!(rules.is_internal_package("@mycompany/logger"));
695 assert!(!rules.is_internal_package("lodash"));
696 assert!(rules.is_ignored("internal-tool"));
697 }
698}