1use crate::model::Ecosystem;
7use regex::Regex;
8use std::collections::HashMap;
9
10use super::ecosystem_config::{
11 ConfigError, EcosystemConfig, EcosystemRulesConfig, NormalizationConfig, ScopeHandling,
12 TyposquatEntry,
13};
14
15pub struct EcosystemRules {
17 config: EcosystemRulesConfig,
19 suspicious_patterns: HashMap<String, Vec<Regex>>,
21 migration_patterns: HashMap<String, Vec<(Regex, String)>>,
23 package_group_patterns: HashMap<String, HashMap<String, Vec<Regex>>>,
26}
27
28impl EcosystemRules {
29 pub fn new() -> Self {
31 Self::with_config(EcosystemRulesConfig::builtin())
32 }
33
34 pub fn with_config(config: EcosystemRulesConfig) -> Self {
36 let suspicious_patterns = Self::compile_suspicious_patterns(&config);
37 let migration_patterns = Self::compile_migration_patterns(&config);
38 let package_group_patterns = Self::compile_package_group_patterns(&config);
39
40 Self {
41 config,
42 suspicious_patterns,
43 migration_patterns,
44 package_group_patterns,
45 }
46 }
47
48 pub fn from_file(path: &std::path::Path) -> Result<Self, ConfigError> {
50 let config = EcosystemRulesConfig::from_file(path)?;
51 Ok(Self::with_config(config))
52 }
53
54 pub fn from_default_locations() -> Self {
56 let config = EcosystemRulesConfig::load_with_precedence(&[
57 ".sbom-tools/ecosystem-rules.yaml",
58 ".sbom-tools/ecosystem-rules.json",
59 "~/.config/sbom-tools/ecosystem-rules.yaml",
60 "~/.config/sbom-tools/ecosystem-rules.json",
61 ])
62 .unwrap_or_else(|_| EcosystemRulesConfig::builtin());
63
64 Self::with_config(config)
65 }
66
67 fn compile_suspicious_patterns(config: &EcosystemRulesConfig) -> HashMap<String, Vec<Regex>> {
69 let mut patterns = HashMap::with_capacity(config.ecosystems.len());
70
71 for (ecosystem, eco_config) in &config.ecosystems {
72 let mut compiled = Vec::with_capacity(eco_config.security.suspicious_patterns.len());
73 for pattern in &eco_config.security.suspicious_patterns {
74 if let Ok(re) = Regex::new(pattern) {
75 compiled.push(re);
76 }
77 }
78 if !compiled.is_empty() {
79 patterns.insert(ecosystem.clone(), compiled);
80 }
81 }
82
83 patterns
84 }
85
86 fn compile_migration_patterns(
88 config: &EcosystemRulesConfig,
89 ) -> HashMap<String, Vec<(Regex, String)>> {
90 let mut patterns = HashMap::with_capacity(config.ecosystems.len());
91
92 for (ecosystem, eco_config) in &config.ecosystems {
93 let mut compiled = Vec::with_capacity(eco_config.group_migrations.len());
94 for migration in &eco_config.group_migrations {
95 let regex_pattern = migration.from.replace('.', r"\.").replace('*', ".*");
97 if let Ok(re) = Regex::new(&format!("^{}$", regex_pattern)) {
98 compiled.push((re, migration.to.clone()));
99 }
100 }
101 if !compiled.is_empty() {
102 patterns.insert(ecosystem.clone(), compiled);
103 }
104 }
105
106 patterns
107 }
108
109 fn compile_package_group_patterns(
111 config: &EcosystemRulesConfig,
112 ) -> HashMap<String, HashMap<String, Vec<Regex>>> {
113 let mut eco_patterns = HashMap::with_capacity(config.ecosystems.len());
114
115 for (ecosystem, eco_config) in &config.ecosystems {
116 let mut group_patterns = HashMap::with_capacity(eco_config.package_groups.len());
117
118 for (group_name, group) in &eco_config.package_groups {
119 let glob_count = group.members.iter().filter(|m| m.contains('*')).count();
121 let mut compiled = Vec::with_capacity(glob_count);
122 for member in &group.members {
123 if member.contains('*') {
124 let regex_pattern = member.replace('.', r"\.").replace('*', ".*");
126 if let Ok(re) = Regex::new(&format!("^{}$", regex_pattern)) {
127 compiled.push(re);
128 }
129 }
130 }
131 if !compiled.is_empty() {
132 group_patterns.insert(group_name.clone(), compiled);
133 }
134 }
135
136 if !group_patterns.is_empty() {
137 eco_patterns.insert(ecosystem.clone(), group_patterns);
138 }
139 }
140
141 eco_patterns
142 }
143
144 pub fn config(&self) -> &EcosystemRulesConfig {
146 &self.config
147 }
148
149 pub fn normalize_name(&self, name: &str, ecosystem: &Ecosystem) -> String {
151 let eco_key = Self::ecosystem_key(ecosystem);
152
153 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
154 self.apply_normalization(name, eco_config)
155 } else {
156 name.to_lowercase()
158 }
159 }
160
161 fn apply_normalization(&self, name: &str, config: &EcosystemConfig) -> String {
163 let norm = &config.normalization;
164 let mut result = name.to_string();
165
166 if result.starts_with('@') {
168 result = self.normalize_scoped_name(&result, norm);
169 } else {
170 if !norm.case_sensitive {
172 result = result.to_lowercase();
173 }
174 }
175
176 for char_group in &norm.equivalent_chars {
178 if char_group.len() >= 2 {
179 let target = &char_group[0];
180 for source in &char_group[1..] {
181 result = result.replace(source.as_str(), target);
182 }
183 }
184 }
185
186 if norm.collapse_separators {
188 result = self.collapse_separators(&result);
189 }
190
191 if norm.strip_version_suffix {
193 result = self.strip_go_version_suffix(&result);
194 }
195
196 result
197 }
198
199 fn normalize_scoped_name(&self, name: &str, norm: &NormalizationConfig) -> String {
201 match norm.scope_handling {
202 ScopeHandling::Lowercase => name.to_lowercase(),
203 ScopeHandling::PreserveScopeCase => {
204 if let Some(slash_pos) = name.find('/') {
205 let scope = &name[..slash_pos];
206 let pkg_name = &name[slash_pos + 1..];
207 format!("{}/{}", scope.to_lowercase(), pkg_name.to_lowercase())
208 } else {
209 name.to_lowercase()
210 }
211 }
212 ScopeHandling::PreserveCase => name.to_string(),
213 }
214 }
215
216 fn collapse_separators(&self, name: &str) -> String {
218 let mut result = String::with_capacity(name.len());
219 let mut last_was_sep = false;
220
221 for c in name.chars() {
222 let is_sep = c == '-' || c == '_' || c == '.';
223 if is_sep {
224 if !last_was_sep {
225 result.push(c);
226 }
227 last_was_sep = true;
228 } else {
229 result.push(c);
230 last_was_sep = false;
231 }
232 }
233
234 result
236 .trim_matches(|c| c == '-' || c == '_' || c == '.')
237 .to_string()
238 }
239
240 fn strip_go_version_suffix(&self, name: &str) -> String {
242 use std::sync::LazyLock;
243 static GO_VERSION_SUFFIX: LazyLock<Regex> =
244 LazyLock::new(|| Regex::new(r"/v\d+$").expect("static regex"));
245 GO_VERSION_SUFFIX.replace(name, "").to_string()
246 }
247
248 pub fn names_match(&self, name_a: &str, name_b: &str, ecosystem: &Ecosystem) -> bool {
250 let norm_a = self.normalize_name(name_a, ecosystem);
251 let norm_b = self.normalize_name(name_b, ecosystem);
252 norm_a == norm_b
253 }
254
255 pub fn get_canonical(&self, name: &str, ecosystem: &Ecosystem) -> Option<String> {
257 let eco_key = Self::ecosystem_key(ecosystem);
258 let name_lower = name.to_lowercase();
259
260 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
261 for (canonical, aliases) in &eco_config.aliases {
262 if canonical.to_lowercase() == name_lower {
263 return Some(canonical.clone());
264 }
265 for alias in aliases {
266 if alias.to_lowercase() == name_lower {
267 return Some(canonical.clone());
268 }
269 }
270 }
271 }
272
273 for equiv in &self.config.custom_rules.equivalences {
275 if equiv.canonical.to_lowercase() == name_lower {
276 return Some(equiv.canonical.clone());
277 }
278 for alias in &equiv.aliases {
279 if alias.to_lowercase() == name_lower {
280 return Some(equiv.canonical.clone());
281 }
282 }
283 }
284
285 None
286 }
287
288 pub fn is_alias(&self, canonical: &str, name: &str, ecosystem: &Ecosystem) -> bool {
290 let eco_key = Self::ecosystem_key(ecosystem);
291 let name_lower = name.to_lowercase();
292 let canonical_lower = canonical.to_lowercase();
293
294 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
295 if let Some(aliases) = eco_config.aliases.get(&canonical_lower) {
296 return aliases.iter().any(|a| a.to_lowercase() == name_lower);
297 }
298 }
299
300 false
301 }
302
303 pub fn get_strip_suffixes(&self, ecosystem: &Ecosystem) -> Vec<&str> {
305 let eco_key = Self::ecosystem_key(ecosystem);
306
307 self.config
308 .ecosystems
309 .get(&eco_key)
310 .map(|c| c.strip_suffixes.iter().map(|s| s.as_str()).collect())
311 .unwrap_or_default()
312 }
313
314 pub fn get_strip_prefixes(&self, ecosystem: &Ecosystem) -> Vec<&str> {
316 let eco_key = Self::ecosystem_key(ecosystem);
317
318 self.config
319 .ecosystems
320 .get(&eco_key)
321 .map(|c| c.strip_prefixes.iter().map(|s| s.as_str()).collect())
322 .unwrap_or_default()
323 }
324
325 pub fn strip_affixes(&self, name: &str, ecosystem: &Ecosystem) -> String {
327 let mut result = name.to_lowercase();
328
329 for prefix in self.get_strip_prefixes(ecosystem) {
330 if result.starts_with(prefix) {
331 result = result[prefix.len()..].to_string();
332 break;
333 }
334 }
335
336 for suffix in self.get_strip_suffixes(ecosystem) {
337 if result.ends_with(suffix) {
338 result = result[..result.len() - suffix.len()].to_string();
339 break;
340 }
341 }
342
343 result
344 }
345
346 pub fn is_typosquat(&self, name: &str, ecosystem: &Ecosystem) -> Option<&TyposquatEntry> {
348 if !self.config.settings.enable_security_checks {
349 return None;
350 }
351
352 let eco_key = Self::ecosystem_key(ecosystem);
353 let name_lower = name.to_lowercase();
354
355 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
356 for entry in &eco_config.security.known_typosquats {
357 if entry.malicious.to_lowercase() == name_lower {
358 return Some(entry);
359 }
360 }
361 }
362
363 None
364 }
365
366 pub fn is_suspicious(&self, name: &str, ecosystem: &Ecosystem) -> bool {
368 if !self.config.settings.enable_security_checks {
369 return false;
370 }
371
372 let eco_key = Self::ecosystem_key(ecosystem);
373
374 if let Some(patterns) = self.suspicious_patterns.get(&eco_key) {
375 patterns.iter().any(|re| re.is_match(name))
376 } else {
377 false
378 }
379 }
380
381 pub fn is_known_malicious(&self, name: &str, ecosystem: &Ecosystem) -> bool {
383 if !self.config.settings.enable_security_checks {
384 return false;
385 }
386
387 let eco_key = Self::ecosystem_key(ecosystem);
388 let name_lower = name.to_lowercase();
389
390 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
391 eco_config
392 .security
393 .known_malicious
394 .iter()
395 .any(|m| m.to_lowercase() == name_lower)
396 } else {
397 false
398 }
399 }
400
401 pub fn get_migrated_group(&self, group: &str, ecosystem: &Ecosystem) -> Option<String> {
403 let eco_key = Self::ecosystem_key(ecosystem);
404
405 if let Some(patterns) = self.migration_patterns.get(&eco_key) {
406 for (pattern, replacement) in patterns {
407 if pattern.is_match(group) {
408 let migrated = pattern.replace(group, replacement.as_str());
409 return Some(migrated.to_string());
410 }
411 }
412 }
413
414 None
415 }
416
417 pub fn get_package_group(&self, name: &str, ecosystem: &Ecosystem) -> Option<&str> {
419 let eco_key = Self::ecosystem_key(ecosystem);
420 let name_lower = name.to_lowercase();
421
422 if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
423 let compiled_patterns = self.package_group_patterns.get(&eco_key);
425
426 for (group_name, group) in &eco_config.package_groups {
427 if group.canonical.to_lowercase() == name_lower {
429 return Some(group_name);
430 }
431
432 for member in &group.members {
434 if member.contains('*') {
435 if let Some(group_patterns) = compiled_patterns {
437 if let Some(patterns) = group_patterns.get(group_name) {
438 if patterns.iter().any(|re| re.is_match(&name_lower)) {
439 return Some(group_name);
440 }
441 }
442 }
443 } else if member.to_lowercase() == name_lower {
444 return Some(group_name);
445 }
446 }
447 }
448 }
449
450 None
451 }
452
453 pub fn get_cross_ecosystem_equivalent(
455 &self,
456 concept: &str,
457 target_ecosystem: &Ecosystem,
458 ) -> Option<&str> {
459 let eco_key = Self::ecosystem_key(target_ecosystem);
460
461 self.config
462 .cross_ecosystem
463 .get(concept)
464 .and_then(|mapping| mapping.get(&eco_key))
465 .and_then(|opt| opt.as_deref())
466 }
467
468 pub fn is_internal_package(&self, name: &str) -> bool {
470 self.config
471 .custom_rules
472 .internal_prefixes
473 .iter()
474 .any(|prefix| name.starts_with(prefix))
475 }
476
477 pub fn is_ignored(&self, name: &str) -> bool {
479 let name_lower = name.to_lowercase();
480 self.config
481 .custom_rules
482 .ignored_packages
483 .iter()
484 .any(|p| p.to_lowercase() == name_lower)
485 }
486
487 fn ecosystem_key(ecosystem: &Ecosystem) -> String {
489 match ecosystem {
490 Ecosystem::Npm => "npm".to_string(),
491 Ecosystem::PyPi => "pypi".to_string(),
492 Ecosystem::Cargo => "cargo".to_string(),
493 Ecosystem::Maven => "maven".to_string(),
494 Ecosystem::Golang => "golang".to_string(),
495 Ecosystem::Nuget => "nuget".to_string(),
496 Ecosystem::RubyGems => "rubygems".to_string(),
497 Ecosystem::Composer => "composer".to_string(),
498 Ecosystem::CocoaPods => "cocoapods".to_string(),
499 Ecosystem::Swift => "swift".to_string(),
500 Ecosystem::Hex => "hex".to_string(),
501 Ecosystem::Pub => "pub".to_string(),
502 Ecosystem::Hackage => "hackage".to_string(),
503 Ecosystem::Cpan => "cpan".to_string(),
504 Ecosystem::Cran => "cran".to_string(),
505 Ecosystem::Conda => "conda".to_string(),
506 Ecosystem::Conan => "conan".to_string(),
507 Ecosystem::Deb => "deb".to_string(),
508 Ecosystem::Rpm => "rpm".to_string(),
509 Ecosystem::Apk => "apk".to_string(),
510 Ecosystem::Generic => "generic".to_string(),
511 Ecosystem::Unknown(s) => s.to_lowercase(),
512 }
513 }
514}
515
516impl Default for EcosystemRules {
517 fn default() -> Self {
518 Self::new()
519 }
520}
521
522#[cfg(test)]
523mod tests {
524 use super::*;
525
526 #[test]
527 fn test_pypi_normalization() {
528 let rules = EcosystemRules::new();
529
530 assert_eq!(
531 rules.normalize_name("python-dateutil", &Ecosystem::PyPi),
532 "python-dateutil"
533 );
534 assert_eq!(
535 rules.normalize_name("python_dateutil", &Ecosystem::PyPi),
536 "python-dateutil"
537 );
538 assert_eq!(
539 rules.normalize_name("Python.Dateutil", &Ecosystem::PyPi),
540 "python-dateutil"
541 );
542 }
543
544 #[test]
545 fn test_cargo_normalization() {
546 let rules = EcosystemRules::new();
547
548 assert_eq!(
549 rules.normalize_name("serde-json", &Ecosystem::Cargo),
550 "serde_json"
551 );
552 assert_eq!(
553 rules.normalize_name("serde_json", &Ecosystem::Cargo),
554 "serde_json"
555 );
556 }
557
558 #[test]
559 fn test_npm_scoped_normalization() {
560 let rules = EcosystemRules::new();
561
562 assert_eq!(
563 rules.normalize_name("@Angular/Core", &Ecosystem::Npm),
564 "@angular/core"
565 );
566 }
567
568 #[test]
569 fn test_names_match() {
570 let rules = EcosystemRules::new();
571
572 assert!(rules.names_match("python-dateutil", "python_dateutil", &Ecosystem::PyPi));
573 assert!(rules.names_match("serde-json", "serde_json", &Ecosystem::Cargo));
574 }
575
576 #[test]
577 fn test_strip_affixes() {
578 let rules = EcosystemRules::new();
579
580 assert_eq!(
581 rules.strip_affixes("python-requests", &Ecosystem::PyPi),
582 "requests"
583 );
584 assert_eq!(rules.strip_affixes("lodash-js", &Ecosystem::Npm), "lodash");
585 }
586
587 #[test]
588 fn test_typosquat_detection() {
589 let rules = EcosystemRules::new();
590
591 let result = rules.is_typosquat("python-dateutils", &Ecosystem::PyPi);
592 assert!(result.is_some());
593 assert_eq!(result.unwrap().legitimate, "python-dateutil");
594
595 assert!(rules.is_typosquat("requests", &Ecosystem::PyPi).is_none());
596 }
597
598 #[test]
599 fn test_package_group() {
600 let rules = EcosystemRules::new();
601
602 assert_eq!(
603 rules.get_package_group("lodash-es", &Ecosystem::Npm),
604 Some("lodash")
605 );
606 assert_eq!(
607 rules.get_package_group("lodash", &Ecosystem::Npm),
608 Some("lodash")
609 );
610 }
611
612 #[test]
613 fn test_cross_ecosystem() {
614 let rules = EcosystemRules::new();
615
616 assert_eq!(
617 rules.get_cross_ecosystem_equivalent("yaml_parsing", &Ecosystem::PyPi),
618 Some("pyyaml")
619 );
620 assert_eq!(
621 rules.get_cross_ecosystem_equivalent("yaml_parsing", &Ecosystem::Npm),
622 Some("js-yaml")
623 );
624 }
625
626 #[test]
627 fn test_go_version_suffix() {
628 let rules = EcosystemRules::new();
629
630 assert_eq!(
631 rules.normalize_name("github.com/foo/bar/v2", &Ecosystem::Golang),
632 "github.com/foo/bar"
633 );
634 assert_eq!(
635 rules.normalize_name("github.com/foo/bar", &Ecosystem::Golang),
636 "github.com/foo/bar"
637 );
638 }
639
640 #[test]
641 fn test_canonical_lookup() {
642 let rules = EcosystemRules::new();
643
644 assert_eq!(
645 rules.get_canonical("PIL", &Ecosystem::PyPi),
646 Some("pillow".to_string())
647 );
648 assert_eq!(
649 rules.get_canonical("sklearn", &Ecosystem::PyPi),
650 Some("scikit-learn".to_string())
651 );
652 }
653
654 #[test]
655 fn test_custom_config() {
656 let yaml = r#"
657version: "1.0"
658custom_rules:
659 internal_prefixes:
660 - "@mycompany/"
661 ignored_packages:
662 - "internal-tool"
663"#;
664 let config = EcosystemRulesConfig::from_yaml(yaml).unwrap();
665 let rules = EcosystemRules::with_config(config);
666
667 assert!(rules.is_internal_package("@mycompany/logger"));
668 assert!(!rules.is_internal_package("lodash"));
669 assert!(rules.is_ignored("internal-tool"));
670 }
671}