1use crate::model::Ecosystem;
7use regex::Regex;
8use std::collections::HashMap;
9
10use super::ecosystem_config::{
11 ConfigError, EcosystemConfig, EcosystemRulesConfig, NormalizationConfig, ScopeHandling,
12 TyposquatEntry,
13};
14
15pub struct EcosystemRules {
17 config: EcosystemRulesConfig,
19 suspicious_patterns: HashMap<String, Vec<Regex>>,
21 migration_patterns: HashMap<String, Vec<(Regex, String)>>,
23 package_group_patterns: HashMap<String, HashMap<String, Vec<Regex>>>,
26}
27
28impl EcosystemRules {
29 #[must_use]
31 pub fn new() -> Self {
32 Self::with_config(EcosystemRulesConfig::builtin())
33 }
34
35 #[must_use]
37 pub fn with_config(config: EcosystemRulesConfig) -> Self {
38 let suspicious_patterns = Self::compile_suspicious_patterns(&config);
39 let migration_patterns = Self::compile_migration_patterns(&config);
40 let package_group_patterns = Self::compile_package_group_patterns(&config);
41
42 Self {
43 config,
44 suspicious_patterns,
45 migration_patterns,
46 package_group_patterns,
47 }
48 }
49
50 pub fn from_file(path: &std::path::Path) -> Result<Self, ConfigError> {
52 let config = EcosystemRulesConfig::from_file(path)?;
53 Ok(Self::with_config(config))
54 }
55
56 #[must_use]
58 pub fn from_default_locations() -> Self {
59 let config = EcosystemRulesConfig::load_with_precedence(&[
60 ".sbom-tools/ecosystem-rules.yaml",
61 ".sbom-tools/ecosystem-rules.json",
62 "~/.config/sbom-tools/ecosystem-rules.yaml",
63 "~/.config/sbom-tools/ecosystem-rules.json",
64 ])
65 .unwrap_or_else(|_| EcosystemRulesConfig::builtin());
66
67 Self::with_config(config)
68 }
69
70 fn compile_suspicious_patterns(config: &EcosystemRulesConfig) -> HashMap<String, Vec<Regex>> {
72 let mut patterns = HashMap::with_capacity(config.ecosystems.len());
73
74 for (ecosystem, eco_config) in &config.ecosystems {
75 let mut compiled = Vec::with_capacity(eco_config.security.suspicious_patterns.len());
76 for pattern in &eco_config.security.suspicious_patterns {
77 if let Ok(re) = Regex::new(pattern) {
78 compiled.push(re);
79 }
80 }
81 if !compiled.is_empty() {
82 patterns.insert(ecosystem.clone(), compiled);
83 }
84 }
85
86 patterns
87 }
88
89 fn compile_migration_patterns(
91 config: &EcosystemRulesConfig,
92 ) -> HashMap<String, Vec<(Regex, String)>> {
93 let mut patterns = HashMap::with_capacity(config.ecosystems.len());
94
95 for (ecosystem, eco_config) in &config.ecosystems {
96 let mut compiled = Vec::with_capacity(eco_config.group_migrations.len());
97 for migration in &eco_config.group_migrations {
98 let regex_pattern = migration.from.replace('.', r"\.").replace('*', ".*");
100 if let Ok(re) = Regex::new(&format!("^{regex_pattern}$")) {
101 compiled.push((re, migration.to.clone()));
102 }
103 }
104 if !compiled.is_empty() {
105 patterns.insert(ecosystem.clone(), compiled);
106 }
107 }
108
109 patterns
110 }
111
112 fn compile_package_group_patterns(
114 config: &EcosystemRulesConfig,
115 ) -> HashMap<String, HashMap<String, Vec<Regex>>> {
116 let mut eco_patterns = HashMap::with_capacity(config.ecosystems.len());
117
118 for (ecosystem, eco_config) in &config.ecosystems {
119 let mut group_patterns = HashMap::with_capacity(eco_config.package_groups.len());
120
121 for (group_name, group) in &eco_config.package_groups {
122 let glob_count = group.members.iter().filter(|m| m.contains('*')).count();
124 let mut compiled = Vec::with_capacity(glob_count);
125 for member in &group.members {
126 if member.contains('*') {
127 let regex_pattern = member.replace('.', r"\.").replace('*', ".*");
129 if let Ok(re) = Regex::new(&format!("^{regex_pattern}$")) {
130 compiled.push(re);
131 }
132 }
133 }
134 if !compiled.is_empty() {
135 group_patterns.insert(group_name.clone(), compiled);
136 }
137 }
138
139 if !group_patterns.is_empty() {
140 eco_patterns.insert(ecosystem.clone(), group_patterns);
141 }
142 }
143
144 eco_patterns
145 }
146
147 #[must_use]
149 pub const fn config(&self) -> &EcosystemRulesConfig {
150 &self.config
151 }
152
153 #[must_use]
155 pub fn normalize_name(&self, name: &str, ecosystem: &Ecosystem) -> String {
156 let eco_key = Self::ecosystem_key(ecosystem);
157
158 self.config.ecosystems.get(&eco_key).map_or_else(
159 || {
160 name.to_lowercase()
162 },
163 |eco_config| self.apply_normalization(name, eco_config),
164 )
165 }
166
167 fn apply_normalization(&self, name: &str, config: &EcosystemConfig) -> String {
169 let norm = &config.normalization;
170 let mut result = name.to_string();
171
172 if result.starts_with('@') {
174 result = self.normalize_scoped_name(&result, norm);
175 } else {
176 if !norm.case_sensitive {
178 result = result.to_lowercase();
179 }
180 }
181
182 for char_group in &norm.equivalent_chars {
184 if char_group.len() >= 2 {
185 let target = &char_group[0];
186 for source in &char_group[1..] {
187 result = result.replace(source.as_str(), target);
188 }
189 }
190 }
191
192 if norm.collapse_separators {
194 result = self.collapse_separators(&result);
195 }
196
197 if norm.strip_version_suffix {
199 result = self.strip_go_version_suffix(&result);
200 }
201
202 result
203 }
204
205 fn normalize_scoped_name(&self, name: &str, norm: &NormalizationConfig) -> String {
207 match norm.scope_handling {
208 ScopeHandling::Lowercase => name.to_lowercase(),
209 ScopeHandling::PreserveScopeCase => name.find('/').map_or_else(
210 || name.to_lowercase(),
211 |slash_pos| {
212 let scope = &name[..slash_pos];
213 let pkg_name = &name[slash_pos + 1..];
214 format!("{}/{}", scope.to_lowercase(), pkg_name.to_lowercase())
215 },
216 ),
217 ScopeHandling::PreserveCase => name.to_string(),
218 }
219 }
220
221 fn collapse_separators(&self, name: &str) -> String {
223 let mut result = String::with_capacity(name.len());
224 let mut last_was_sep = false;
225
226 for c in name.chars() {
227 let is_sep = c == '-' || c == '_' || c == '.';
228 if is_sep {
229 if !last_was_sep {
230 result.push(c);
231 }
232 last_was_sep = true;
233 } else {
234 result.push(c);
235 last_was_sep = false;
236 }
237 }
238
239 result
241 .trim_matches(|c| c == '-' || c == '_' || c == '.')
242 .to_string()
243 }
244
245 fn strip_go_version_suffix(&self, name: &str) -> String {
247 use std::sync::LazyLock;
248 static GO_VERSION_SUFFIX: LazyLock<Regex> =
249 LazyLock::new(|| Regex::new(r"/v\d+$").expect("static regex"));
250 GO_VERSION_SUFFIX.replace(name, "").to_string()
251 }
252
253 #[must_use]
255 pub fn names_match(&self, name_a: &str, name_b: &str, ecosystem: &Ecosystem) -> bool {
256 let norm_a = self.normalize_name(name_a, ecosystem);
257 let norm_b = self.normalize_name(name_b, ecosystem);
258 norm_a == norm_b
259 }
260
261 #[must_use]
263 pub fn get_canonical(&self, name: &str, ecosystem: &Ecosystem) -> Option<String> {
264 let eco_key = Self::ecosystem_key(ecosystem);
265 let name_lower = name.to_lowercase();
266
267 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
268 for (canonical, aliases) in &eco_config.aliases {
269 if canonical.to_lowercase() == name_lower {
270 return Some(canonical.clone());
271 }
272 for alias in aliases {
273 if alias.to_lowercase() == name_lower {
274 return Some(canonical.clone());
275 }
276 }
277 }
278 }
279
280 for equiv in &self.config.custom_rules.equivalences {
282 if equiv.canonical.to_lowercase() == name_lower {
283 return Some(equiv.canonical.clone());
284 }
285 for alias in &equiv.aliases {
286 if alias.to_lowercase() == name_lower {
287 return Some(equiv.canonical.clone());
288 }
289 }
290 }
291
292 None
293 }
294
295 #[must_use]
297 pub fn is_alias(&self, canonical: &str, name: &str, ecosystem: &Ecosystem) -> bool {
298 let eco_key = Self::ecosystem_key(ecosystem);
299 let name_lower = name.to_lowercase();
300 let canonical_lower = canonical.to_lowercase();
301
302 if let Some(eco_config) = self.config.ecosystems.get(&eco_key)
303 && let Some(aliases) = eco_config.aliases.get(&canonical_lower)
304 {
305 return aliases.iter().any(|a| a.to_lowercase() == name_lower);
306 }
307
308 false
309 }
310
311 #[must_use]
313 pub fn get_strip_suffixes(&self, ecosystem: &Ecosystem) -> Vec<&str> {
314 let eco_key = Self::ecosystem_key(ecosystem);
315
316 self.config
317 .ecosystems
318 .get(&eco_key)
319 .map(|c| {
320 c.strip_suffixes
321 .iter()
322 .map(std::string::String::as_str)
323 .collect()
324 })
325 .unwrap_or_default()
326 }
327
328 #[must_use]
330 pub fn get_strip_prefixes(&self, ecosystem: &Ecosystem) -> Vec<&str> {
331 let eco_key = Self::ecosystem_key(ecosystem);
332
333 self.config
334 .ecosystems
335 .get(&eco_key)
336 .map(|c| {
337 c.strip_prefixes
338 .iter()
339 .map(std::string::String::as_str)
340 .collect()
341 })
342 .unwrap_or_default()
343 }
344
345 #[must_use]
347 pub fn strip_affixes(&self, name: &str, ecosystem: &Ecosystem) -> String {
348 let mut result = name.to_lowercase();
349
350 for prefix in self.get_strip_prefixes(ecosystem) {
351 if result.starts_with(prefix) {
352 result = result[prefix.len()..].to_string();
353 break;
354 }
355 }
356
357 for suffix in self.get_strip_suffixes(ecosystem) {
358 if result.ends_with(suffix) {
359 result = result[..result.len() - suffix.len()].to_string();
360 break;
361 }
362 }
363
364 result
365 }
366
367 #[must_use]
369 pub fn is_typosquat(&self, name: &str, ecosystem: &Ecosystem) -> Option<&TyposquatEntry> {
370 if !self.config.settings.enable_security_checks {
371 return None;
372 }
373
374 let eco_key = Self::ecosystem_key(ecosystem);
375 let name_lower = name.to_lowercase();
376
377 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
378 for entry in &eco_config.security.known_typosquats {
379 if entry.malicious.to_lowercase() == name_lower {
380 return Some(entry);
381 }
382 }
383 }
384
385 None
386 }
387
388 #[must_use]
390 pub fn is_suspicious(&self, name: &str, ecosystem: &Ecosystem) -> bool {
391 if !self.config.settings.enable_security_checks {
392 return false;
393 }
394
395 let eco_key = Self::ecosystem_key(ecosystem);
396
397 self.suspicious_patterns
398 .get(&eco_key)
399 .is_some_and(|patterns| patterns.iter().any(|re| re.is_match(name)))
400 }
401
402 #[must_use]
404 pub fn is_known_malicious(&self, name: &str, ecosystem: &Ecosystem) -> bool {
405 if !self.config.settings.enable_security_checks {
406 return false;
407 }
408
409 let eco_key = Self::ecosystem_key(ecosystem);
410 let name_lower = name.to_lowercase();
411
412 self.config
413 .ecosystems
414 .get(&eco_key)
415 .is_some_and(|eco_config| {
416 eco_config
417 .security
418 .known_malicious
419 .iter()
420 .any(|m| m.to_lowercase() == name_lower)
421 })
422 }
423
424 #[must_use]
426 pub fn get_migrated_group(&self, group: &str, ecosystem: &Ecosystem) -> Option<String> {
427 let eco_key = Self::ecosystem_key(ecosystem);
428
429 if let Some(patterns) = self.migration_patterns.get(&eco_key) {
430 for (pattern, replacement) in patterns {
431 if pattern.is_match(group) {
432 let migrated = pattern.replace(group, replacement.as_str());
433 return Some(migrated.to_string());
434 }
435 }
436 }
437
438 None
439 }
440
441 #[must_use]
443 pub fn get_package_group(&self, name: &str, ecosystem: &Ecosystem) -> Option<&str> {
444 let eco_key = Self::ecosystem_key(ecosystem);
445 let name_lower = name.to_lowercase();
446
447 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
448 let compiled_patterns = self.package_group_patterns.get(&eco_key);
450
451 for (group_name, group) in &eco_config.package_groups {
452 if group.canonical.to_lowercase() == name_lower {
454 return Some(group_name);
455 }
456
457 for member in &group.members {
459 if member.contains('*') {
460 if let Some(group_patterns) = compiled_patterns
462 && let Some(patterns) = group_patterns.get(group_name)
463 && patterns.iter().any(|re| re.is_match(&name_lower))
464 {
465 return Some(group_name);
466 }
467 } else if member.to_lowercase() == name_lower {
468 return Some(group_name);
469 }
470 }
471 }
472 }
473
474 None
475 }
476
477 #[must_use]
479 pub fn get_cross_ecosystem_equivalent(
480 &self,
481 concept: &str,
482 target_ecosystem: &Ecosystem,
483 ) -> Option<&str> {
484 let eco_key = Self::ecosystem_key(target_ecosystem);
485
486 self.config
487 .cross_ecosystem
488 .get(concept)
489 .and_then(|mapping| mapping.get(&eco_key))
490 .and_then(|opt| opt.as_deref())
491 }
492
493 #[must_use]
495 pub fn is_internal_package(&self, name: &str) -> bool {
496 self.config
497 .custom_rules
498 .internal_prefixes
499 .iter()
500 .any(|prefix| name.starts_with(prefix))
501 }
502
503 #[must_use]
505 pub fn is_ignored(&self, name: &str) -> bool {
506 let name_lower = name.to_lowercase();
507 self.config
508 .custom_rules
509 .ignored_packages
510 .iter()
511 .any(|p| p.to_lowercase() == name_lower)
512 }
513
514 fn ecosystem_key(ecosystem: &Ecosystem) -> String {
516 match ecosystem {
517 Ecosystem::Npm => "npm".to_string(),
518 Ecosystem::PyPi => "pypi".to_string(),
519 Ecosystem::Cargo => "cargo".to_string(),
520 Ecosystem::Maven => "maven".to_string(),
521 Ecosystem::Golang => "golang".to_string(),
522 Ecosystem::Nuget => "nuget".to_string(),
523 Ecosystem::RubyGems => "rubygems".to_string(),
524 Ecosystem::Composer => "composer".to_string(),
525 Ecosystem::CocoaPods => "cocoapods".to_string(),
526 Ecosystem::Swift => "swift".to_string(),
527 Ecosystem::Hex => "hex".to_string(),
528 Ecosystem::Pub => "pub".to_string(),
529 Ecosystem::Hackage => "hackage".to_string(),
530 Ecosystem::Cpan => "cpan".to_string(),
531 Ecosystem::Cran => "cran".to_string(),
532 Ecosystem::Conda => "conda".to_string(),
533 Ecosystem::Conan => "conan".to_string(),
534 Ecosystem::Deb => "deb".to_string(),
535 Ecosystem::Rpm => "rpm".to_string(),
536 Ecosystem::Apk => "apk".to_string(),
537 Ecosystem::HuggingFace => "huggingface".to_string(),
538 Ecosystem::Generic => "generic".to_string(),
539 Ecosystem::Unknown(s) => s.to_lowercase(),
540 }
541 }
542}
543
544impl Default for EcosystemRules {
545 fn default() -> Self {
546 Self::new()
547 }
548}
549
550#[cfg(test)]
551mod tests {
552 use super::*;
553
554 #[test]
555 fn test_pypi_normalization() {
556 let rules = EcosystemRules::new();
557
558 assert_eq!(
559 rules.normalize_name("python-dateutil", &Ecosystem::PyPi),
560 "python-dateutil"
561 );
562 assert_eq!(
563 rules.normalize_name("python_dateutil", &Ecosystem::PyPi),
564 "python-dateutil"
565 );
566 assert_eq!(
567 rules.normalize_name("Python.Dateutil", &Ecosystem::PyPi),
568 "python-dateutil"
569 );
570 }
571
572 #[test]
573 fn test_cargo_normalization() {
574 let rules = EcosystemRules::new();
575
576 assert_eq!(
577 rules.normalize_name("serde-json", &Ecosystem::Cargo),
578 "serde_json"
579 );
580 assert_eq!(
581 rules.normalize_name("serde_json", &Ecosystem::Cargo),
582 "serde_json"
583 );
584 }
585
586 #[test]
587 fn test_npm_scoped_normalization() {
588 let rules = EcosystemRules::new();
589
590 assert_eq!(
591 rules.normalize_name("@Angular/Core", &Ecosystem::Npm),
592 "@angular/core"
593 );
594 }
595
596 #[test]
597 fn test_names_match() {
598 let rules = EcosystemRules::new();
599
600 assert!(rules.names_match("python-dateutil", "python_dateutil", &Ecosystem::PyPi));
601 assert!(rules.names_match("serde-json", "serde_json", &Ecosystem::Cargo));
602 }
603
604 #[test]
605 fn test_strip_affixes() {
606 let rules = EcosystemRules::new();
607
608 assert_eq!(
609 rules.strip_affixes("python-requests", &Ecosystem::PyPi),
610 "requests"
611 );
612 assert_eq!(rules.strip_affixes("lodash-js", &Ecosystem::Npm), "lodash");
613 }
614
615 #[test]
616 fn test_typosquat_detection() {
617 let rules = EcosystemRules::new();
618
619 let result = rules.is_typosquat("python-dateutils", &Ecosystem::PyPi);
620 assert!(result.is_some());
621 assert_eq!(result.unwrap().legitimate, "python-dateutil");
622
623 assert!(rules.is_typosquat("requests", &Ecosystem::PyPi).is_none());
624 }
625
626 #[test]
627 fn test_package_group() {
628 let rules = EcosystemRules::new();
629
630 assert_eq!(
631 rules.get_package_group("lodash-es", &Ecosystem::Npm),
632 Some("lodash")
633 );
634 assert_eq!(
635 rules.get_package_group("lodash", &Ecosystem::Npm),
636 Some("lodash")
637 );
638 }
639
640 #[test]
641 fn test_cross_ecosystem() {
642 let rules = EcosystemRules::new();
643
644 assert_eq!(
645 rules.get_cross_ecosystem_equivalent("yaml_parsing", &Ecosystem::PyPi),
646 Some("pyyaml")
647 );
648 assert_eq!(
649 rules.get_cross_ecosystem_equivalent("yaml_parsing", &Ecosystem::Npm),
650 Some("js-yaml")
651 );
652 }
653
654 #[test]
655 fn test_go_version_suffix() {
656 let rules = EcosystemRules::new();
657
658 assert_eq!(
659 rules.normalize_name("github.com/foo/bar/v2", &Ecosystem::Golang),
660 "github.com/foo/bar"
661 );
662 assert_eq!(
663 rules.normalize_name("github.com/foo/bar", &Ecosystem::Golang),
664 "github.com/foo/bar"
665 );
666 }
667
668 #[test]
669 fn test_canonical_lookup() {
670 let rules = EcosystemRules::new();
671
672 assert_eq!(
673 rules.get_canonical("PIL", &Ecosystem::PyPi),
674 Some("pillow".to_string())
675 );
676 assert_eq!(
677 rules.get_canonical("sklearn", &Ecosystem::PyPi),
678 Some("scikit-learn".to_string())
679 );
680 }
681
682 #[test]
683 fn test_custom_config() {
684 let yaml = r#"
685version: "1.0"
686custom_rules:
687 internal_prefixes:
688 - "@mycompany/"
689 ignored_packages:
690 - "internal-tool"
691"#;
692 let config = EcosystemRulesConfig::from_yaml(yaml).unwrap();
693 let rules = EcosystemRules::with_config(config);
694
695 assert!(rules.is_internal_package("@mycompany/logger"));
696 assert!(!rules.is_internal_package("lodash"));
697 assert!(rules.is_ignored("internal-tool"));
698 }
699}