use crate::model::Ecosystem;
use regex::Regex;
use std::collections::HashMap;
use super::ecosystem_config::{
ConfigError, EcosystemConfig, EcosystemRulesConfig, NormalizationConfig, ScopeHandling,
TyposquatEntry,
};
pub struct EcosystemRules {
config: EcosystemRulesConfig,
suspicious_patterns: HashMap<String, Vec<Regex>>,
migration_patterns: HashMap<String, Vec<(Regex, String)>>,
package_group_patterns: HashMap<String, HashMap<String, Vec<Regex>>>,
}
impl EcosystemRules {
#[must_use]
pub fn new() -> Self {
Self::with_config(EcosystemRulesConfig::builtin())
}
#[must_use]
pub fn with_config(config: EcosystemRulesConfig) -> Self {
let suspicious_patterns = Self::compile_suspicious_patterns(&config);
let migration_patterns = Self::compile_migration_patterns(&config);
let package_group_patterns = Self::compile_package_group_patterns(&config);
Self {
config,
suspicious_patterns,
migration_patterns,
package_group_patterns,
}
}
pub fn from_file(path: &std::path::Path) -> Result<Self, ConfigError> {
let config = EcosystemRulesConfig::from_file(path)?;
Ok(Self::with_config(config))
}
#[must_use]
pub fn from_default_locations() -> Self {
let config = EcosystemRulesConfig::load_with_precedence(&[
".sbom-tools/ecosystem-rules.yaml",
".sbom-tools/ecosystem-rules.json",
"~/.config/sbom-tools/ecosystem-rules.yaml",
"~/.config/sbom-tools/ecosystem-rules.json",
])
.unwrap_or_else(|_| EcosystemRulesConfig::builtin());
Self::with_config(config)
}
fn compile_suspicious_patterns(config: &EcosystemRulesConfig) -> HashMap<String, Vec<Regex>> {
let mut patterns = HashMap::with_capacity(config.ecosystems.len());
for (ecosystem, eco_config) in &config.ecosystems {
let mut compiled = Vec::with_capacity(eco_config.security.suspicious_patterns.len());
for pattern in &eco_config.security.suspicious_patterns {
if let Ok(re) = Regex::new(pattern) {
compiled.push(re);
}
}
if !compiled.is_empty() {
patterns.insert(ecosystem.clone(), compiled);
}
}
patterns
}
fn compile_migration_patterns(
config: &EcosystemRulesConfig,
) -> HashMap<String, Vec<(Regex, String)>> {
let mut patterns = HashMap::with_capacity(config.ecosystems.len());
for (ecosystem, eco_config) in &config.ecosystems {
let mut compiled = Vec::with_capacity(eco_config.group_migrations.len());
for migration in &eco_config.group_migrations {
let regex_pattern = migration.from.replace('.', r"\.").replace('*', ".*");
if let Ok(re) = Regex::new(&format!("^{regex_pattern}$")) {
compiled.push((re, migration.to.clone()));
}
}
if !compiled.is_empty() {
patterns.insert(ecosystem.clone(), compiled);
}
}
patterns
}
fn compile_package_group_patterns(
config: &EcosystemRulesConfig,
) -> HashMap<String, HashMap<String, Vec<Regex>>> {
let mut eco_patterns = HashMap::with_capacity(config.ecosystems.len());
for (ecosystem, eco_config) in &config.ecosystems {
let mut group_patterns = HashMap::with_capacity(eco_config.package_groups.len());
for (group_name, group) in &eco_config.package_groups {
let glob_count = group.members.iter().filter(|m| m.contains('*')).count();
let mut compiled = Vec::with_capacity(glob_count);
for member in &group.members {
if member.contains('*') {
let regex_pattern = member.replace('.', r"\.").replace('*', ".*");
if let Ok(re) = Regex::new(&format!("^{regex_pattern}$")) {
compiled.push(re);
}
}
}
if !compiled.is_empty() {
group_patterns.insert(group_name.clone(), compiled);
}
}
if !group_patterns.is_empty() {
eco_patterns.insert(ecosystem.clone(), group_patterns);
}
}
eco_patterns
}
#[must_use]
pub const fn config(&self) -> &EcosystemRulesConfig {
&self.config
}
#[must_use]
pub fn normalize_name(&self, name: &str, ecosystem: &Ecosystem) -> String {
let eco_key = Self::ecosystem_key(ecosystem);
self.config.ecosystems.get(&eco_key).map_or_else(
|| {
name.to_lowercase()
},
|eco_config| self.apply_normalization(name, eco_config),
)
}
fn apply_normalization(&self, name: &str, config: &EcosystemConfig) -> String {
let norm = &config.normalization;
let mut result = name.to_string();
if result.starts_with('@') {
result = self.normalize_scoped_name(&result, norm);
} else {
if !norm.case_sensitive {
result = result.to_lowercase();
}
}
for char_group in &norm.equivalent_chars {
if char_group.len() >= 2 {
let target = &char_group[0];
for source in &char_group[1..] {
result = result.replace(source.as_str(), target);
}
}
}
if norm.collapse_separators {
result = self.collapse_separators(&result);
}
if norm.strip_version_suffix {
result = self.strip_go_version_suffix(&result);
}
result
}
fn normalize_scoped_name(&self, name: &str, norm: &NormalizationConfig) -> String {
match norm.scope_handling {
ScopeHandling::Lowercase => name.to_lowercase(),
ScopeHandling::PreserveScopeCase => name.find('/').map_or_else(
|| name.to_lowercase(),
|slash_pos| {
let scope = &name[..slash_pos];
let pkg_name = &name[slash_pos + 1..];
format!("{}/{}", scope.to_lowercase(), pkg_name.to_lowercase())
},
),
ScopeHandling::PreserveCase => name.to_string(),
}
}
fn collapse_separators(&self, name: &str) -> String {
let mut result = String::with_capacity(name.len());
let mut last_was_sep = false;
for c in name.chars() {
let is_sep = c == '-' || c == '_' || c == '.';
if is_sep {
if !last_was_sep {
result.push(c);
}
last_was_sep = true;
} else {
result.push(c);
last_was_sep = false;
}
}
result
.trim_matches(|c| c == '-' || c == '_' || c == '.')
.to_string()
}
fn strip_go_version_suffix(&self, name: &str) -> String {
use std::sync::LazyLock;
static GO_VERSION_SUFFIX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"/v\d+$").expect("static regex"));
GO_VERSION_SUFFIX.replace(name, "").to_string()
}
#[must_use]
pub fn names_match(&self, name_a: &str, name_b: &str, ecosystem: &Ecosystem) -> bool {
let norm_a = self.normalize_name(name_a, ecosystem);
let norm_b = self.normalize_name(name_b, ecosystem);
norm_a == norm_b
}
#[must_use]
pub fn get_canonical(&self, name: &str, ecosystem: &Ecosystem) -> Option<String> {
let eco_key = Self::ecosystem_key(ecosystem);
let name_lower = name.to_lowercase();
if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
for (canonical, aliases) in &eco_config.aliases {
if canonical.to_lowercase() == name_lower {
return Some(canonical.clone());
}
for alias in aliases {
if alias.to_lowercase() == name_lower {
return Some(canonical.clone());
}
}
}
}
for equiv in &self.config.custom_rules.equivalences {
if equiv.canonical.to_lowercase() == name_lower {
return Some(equiv.canonical.clone());
}
for alias in &equiv.aliases {
if alias.to_lowercase() == name_lower {
return Some(equiv.canonical.clone());
}
}
}
None
}
#[must_use]
pub fn is_alias(&self, canonical: &str, name: &str, ecosystem: &Ecosystem) -> bool {
let eco_key = Self::ecosystem_key(ecosystem);
let name_lower = name.to_lowercase();
let canonical_lower = canonical.to_lowercase();
if let Some(eco_config) = self.config.ecosystems.get(&eco_key)
&& let Some(aliases) = eco_config.aliases.get(&canonical_lower)
{
return aliases.iter().any(|a| a.to_lowercase() == name_lower);
}
false
}
#[must_use]
pub fn get_strip_suffixes(&self, ecosystem: &Ecosystem) -> Vec<&str> {
let eco_key = Self::ecosystem_key(ecosystem);
self.config
.ecosystems
.get(&eco_key)
.map(|c| {
c.strip_suffixes
.iter()
.map(std::string::String::as_str)
.collect()
})
.unwrap_or_default()
}
#[must_use]
pub fn get_strip_prefixes(&self, ecosystem: &Ecosystem) -> Vec<&str> {
let eco_key = Self::ecosystem_key(ecosystem);
self.config
.ecosystems
.get(&eco_key)
.map(|c| {
c.strip_prefixes
.iter()
.map(std::string::String::as_str)
.collect()
})
.unwrap_or_default()
}
#[must_use]
pub fn strip_affixes(&self, name: &str, ecosystem: &Ecosystem) -> String {
let mut result = name.to_lowercase();
for prefix in self.get_strip_prefixes(ecosystem) {
if result.starts_with(prefix) {
result = result[prefix.len()..].to_string();
break;
}
}
for suffix in self.get_strip_suffixes(ecosystem) {
if result.ends_with(suffix) {
result = result[..result.len() - suffix.len()].to_string();
break;
}
}
result
}
#[must_use]
pub fn is_typosquat(&self, name: &str, ecosystem: &Ecosystem) -> Option<&TyposquatEntry> {
if !self.config.settings.enable_security_checks {
return None;
}
let eco_key = Self::ecosystem_key(ecosystem);
let name_lower = name.to_lowercase();
if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
for entry in &eco_config.security.known_typosquats {
if entry.malicious.to_lowercase() == name_lower {
return Some(entry);
}
}
}
None
}
#[must_use]
pub fn is_suspicious(&self, name: &str, ecosystem: &Ecosystem) -> bool {
if !self.config.settings.enable_security_checks {
return false;
}
let eco_key = Self::ecosystem_key(ecosystem);
self.suspicious_patterns
.get(&eco_key)
.is_some_and(|patterns| patterns.iter().any(|re| re.is_match(name)))
}
#[must_use]
pub fn is_known_malicious(&self, name: &str, ecosystem: &Ecosystem) -> bool {
if !self.config.settings.enable_security_checks {
return false;
}
let eco_key = Self::ecosystem_key(ecosystem);
let name_lower = name.to_lowercase();
self.config
.ecosystems
.get(&eco_key)
.is_some_and(|eco_config| {
eco_config
.security
.known_malicious
.iter()
.any(|m| m.to_lowercase() == name_lower)
})
}
#[must_use]
pub fn get_migrated_group(&self, group: &str, ecosystem: &Ecosystem) -> Option<String> {
let eco_key = Self::ecosystem_key(ecosystem);
if let Some(patterns) = self.migration_patterns.get(&eco_key) {
for (pattern, replacement) in patterns {
if pattern.is_match(group) {
let migrated = pattern.replace(group, replacement.as_str());
return Some(migrated.to_string());
}
}
}
None
}
#[must_use]
pub fn get_package_group(&self, name: &str, ecosystem: &Ecosystem) -> Option<&str> {
let eco_key = Self::ecosystem_key(ecosystem);
let name_lower = name.to_lowercase();
if let Some(eco_config) = self.config.ecosystems.get(&eco_key) {
let compiled_patterns = self.package_group_patterns.get(&eco_key);
for (group_name, group) in &eco_config.package_groups {
if group.canonical.to_lowercase() == name_lower {
return Some(group_name);
}
for member in &group.members {
if member.contains('*') {
if let Some(group_patterns) = compiled_patterns
&& let Some(patterns) = group_patterns.get(group_name)
&& patterns.iter().any(|re| re.is_match(&name_lower))
{
return Some(group_name);
}
} else if member.to_lowercase() == name_lower {
return Some(group_name);
}
}
}
}
None
}
#[must_use]
pub fn get_cross_ecosystem_equivalent(
&self,
concept: &str,
target_ecosystem: &Ecosystem,
) -> Option<&str> {
let eco_key = Self::ecosystem_key(target_ecosystem);
self.config
.cross_ecosystem
.get(concept)
.and_then(|mapping| mapping.get(&eco_key))
.and_then(|opt| opt.as_deref())
}
#[must_use]
pub fn is_internal_package(&self, name: &str) -> bool {
self.config
.custom_rules
.internal_prefixes
.iter()
.any(|prefix| name.starts_with(prefix))
}
#[must_use]
pub fn is_ignored(&self, name: &str) -> bool {
let name_lower = name.to_lowercase();
self.config
.custom_rules
.ignored_packages
.iter()
.any(|p| p.to_lowercase() == name_lower)
}
fn ecosystem_key(ecosystem: &Ecosystem) -> String {
match ecosystem {
Ecosystem::Npm => "npm".to_string(),
Ecosystem::PyPi => "pypi".to_string(),
Ecosystem::Cargo => "cargo".to_string(),
Ecosystem::Maven => "maven".to_string(),
Ecosystem::Golang => "golang".to_string(),
Ecosystem::Nuget => "nuget".to_string(),
Ecosystem::RubyGems => "rubygems".to_string(),
Ecosystem::Composer => "composer".to_string(),
Ecosystem::CocoaPods => "cocoapods".to_string(),
Ecosystem::Swift => "swift".to_string(),
Ecosystem::Hex => "hex".to_string(),
Ecosystem::Pub => "pub".to_string(),
Ecosystem::Hackage => "hackage".to_string(),
Ecosystem::Cpan => "cpan".to_string(),
Ecosystem::Cran => "cran".to_string(),
Ecosystem::Conda => "conda".to_string(),
Ecosystem::Conan => "conan".to_string(),
Ecosystem::Deb => "deb".to_string(),
Ecosystem::Rpm => "rpm".to_string(),
Ecosystem::Apk => "apk".to_string(),
Ecosystem::Generic => "generic".to_string(),
Ecosystem::Unknown(s) => s.to_lowercase(),
}
}
}
impl Default for EcosystemRules {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pypi_normalization() {
let rules = EcosystemRules::new();
assert_eq!(
rules.normalize_name("python-dateutil", &Ecosystem::PyPi),
"python-dateutil"
);
assert_eq!(
rules.normalize_name("python_dateutil", &Ecosystem::PyPi),
"python-dateutil"
);
assert_eq!(
rules.normalize_name("Python.Dateutil", &Ecosystem::PyPi),
"python-dateutil"
);
}
#[test]
fn test_cargo_normalization() {
let rules = EcosystemRules::new();
assert_eq!(
rules.normalize_name("serde-json", &Ecosystem::Cargo),
"serde_json"
);
assert_eq!(
rules.normalize_name("serde_json", &Ecosystem::Cargo),
"serde_json"
);
}
#[test]
fn test_npm_scoped_normalization() {
let rules = EcosystemRules::new();
assert_eq!(
rules.normalize_name("@Angular/Core", &Ecosystem::Npm),
"@angular/core"
);
}
#[test]
fn test_names_match() {
let rules = EcosystemRules::new();
assert!(rules.names_match("python-dateutil", "python_dateutil", &Ecosystem::PyPi));
assert!(rules.names_match("serde-json", "serde_json", &Ecosystem::Cargo));
}
#[test]
fn test_strip_affixes() {
let rules = EcosystemRules::new();
assert_eq!(
rules.strip_affixes("python-requests", &Ecosystem::PyPi),
"requests"
);
assert_eq!(rules.strip_affixes("lodash-js", &Ecosystem::Npm), "lodash");
}
#[test]
fn test_typosquat_detection() {
let rules = EcosystemRules::new();
let result = rules.is_typosquat("python-dateutils", &Ecosystem::PyPi);
assert!(result.is_some());
assert_eq!(result.unwrap().legitimate, "python-dateutil");
assert!(rules.is_typosquat("requests", &Ecosystem::PyPi).is_none());
}
#[test]
fn test_package_group() {
let rules = EcosystemRules::new();
assert_eq!(
rules.get_package_group("lodash-es", &Ecosystem::Npm),
Some("lodash")
);
assert_eq!(
rules.get_package_group("lodash", &Ecosystem::Npm),
Some("lodash")
);
}
#[test]
fn test_cross_ecosystem() {
let rules = EcosystemRules::new();
assert_eq!(
rules.get_cross_ecosystem_equivalent("yaml_parsing", &Ecosystem::PyPi),
Some("pyyaml")
);
assert_eq!(
rules.get_cross_ecosystem_equivalent("yaml_parsing", &Ecosystem::Npm),
Some("js-yaml")
);
}
#[test]
fn test_go_version_suffix() {
let rules = EcosystemRules::new();
assert_eq!(
rules.normalize_name("github.com/foo/bar/v2", &Ecosystem::Golang),
"github.com/foo/bar"
);
assert_eq!(
rules.normalize_name("github.com/foo/bar", &Ecosystem::Golang),
"github.com/foo/bar"
);
}
#[test]
fn test_canonical_lookup() {
let rules = EcosystemRules::new();
assert_eq!(
rules.get_canonical("PIL", &Ecosystem::PyPi),
Some("pillow".to_string())
);
assert_eq!(
rules.get_canonical("sklearn", &Ecosystem::PyPi),
Some("scikit-learn".to_string())
);
}
#[test]
fn test_custom_config() {
let yaml = r#"
version: "1.0"
custom_rules:
internal_prefixes:
- "@mycompany/"
ignored_packages:
- "internal-tool"
"#;
let config = EcosystemRulesConfig::from_yaml(yaml).unwrap();
let rules = EcosystemRules::with_config(config);
assert!(rules.is_internal_package("@mycompany/logger"));
assert!(!rules.is_internal_package("lodash"));
assert!(rules.is_ignored("internal-tool"));
}
}