use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct EcosystemRulesConfig {
#[serde(default = "default_version")]
pub version: String,
#[serde(default)]
pub settings: GlobalSettings,
#[serde(default)]
pub ecosystems: HashMap<String, EcosystemConfig>,
#[serde(default)]
pub cross_ecosystem: HashMap<String, HashMap<String, Option<String>>>,
#[serde(default)]
pub custom_rules: CustomRules,
}
fn default_version() -> String {
"1.0".to_string()
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct GlobalSettings {
#[serde(default)]
pub case_sensitive_default: bool,
#[serde(default = "default_true")]
pub normalize_unicode: bool,
#[serde(default = "default_true")]
pub enable_security_checks: bool,
}
impl Default for GlobalSettings {
fn default() -> Self {
Self {
case_sensitive_default: false,
normalize_unicode: true,
enable_security_checks: true,
}
}
}
const fn default_true() -> bool {
true
}
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct EcosystemConfig {
#[serde(default)]
pub normalization: NormalizationConfig,
#[serde(default)]
pub strip_prefixes: Vec<String>,
#[serde(default)]
pub strip_suffixes: Vec<String>,
#[serde(default)]
pub aliases: HashMap<String, Vec<String>>,
#[serde(default)]
pub package_groups: HashMap<String, PackageGroup>,
#[serde(default)]
pub versioning: VersioningConfig,
#[serde(default)]
pub security: SecurityConfig,
#[serde(default)]
pub import_mappings: Vec<ImportMapping>,
#[serde(default)]
pub group_migrations: Vec<GroupMigration>,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct NormalizationConfig {
#[serde(default)]
pub case_sensitive: bool,
#[serde(default)]
pub equivalent_chars: Vec<Vec<String>>,
#[serde(default)]
pub collapse_separators: bool,
#[serde(default)]
pub use_full_coordinate: bool,
#[serde(default)]
pub strip_version_suffix: bool,
#[serde(default)]
pub scope_handling: ScopeHandling,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ScopeHandling {
#[default]
Lowercase,
PreserveScopeCase,
PreserveCase,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct PackageGroup {
pub canonical: String,
#[serde(default)]
pub members: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct VersioningConfig {
#[serde(default = "default_semver")]
pub spec: VersionSpec,
#[serde(default)]
pub prerelease_tags: Vec<String>,
#[serde(default)]
pub qualifier_order: Vec<String>,
}
const fn default_semver() -> VersionSpec {
VersionSpec::Semver
}
impl Default for VersioningConfig {
fn default() -> Self {
Self {
spec: VersionSpec::Semver,
prerelease_tags: vec![],
qualifier_order: vec![],
}
}
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum VersionSpec {
#[default]
Semver,
Pep440,
Maven,
Rubygems,
Gomod,
Generic,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct SecurityConfig {
#[serde(default)]
pub known_typosquats: Vec<TyposquatEntry>,
#[serde(default)]
pub suspicious_patterns: Vec<String>,
#[serde(default)]
pub known_malicious: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct TyposquatEntry {
pub malicious: String,
pub legitimate: String,
#[serde(default)]
pub description: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ImportMapping {
pub pattern: String,
#[serde(rename = "type")]
pub mapping_type: String,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct GroupMigration {
pub from: String,
pub to: String,
#[serde(default)]
pub after_version: Option<String>,
}
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct CustomRules {
#[serde(default)]
pub internal_prefixes: Vec<String>,
#[serde(default)]
pub equivalences: Vec<CustomEquivalence>,
#[serde(default)]
pub ignored_packages: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct CustomEquivalence {
pub canonical: String,
pub aliases: Vec<String>,
#[serde(default)]
pub version_sensitive: bool,
}
impl EcosystemRulesConfig {
#[must_use]
pub fn new() -> Self {
Self {
version: default_version(),
settings: GlobalSettings::default(),
ecosystems: HashMap::new(),
cross_ecosystem: HashMap::new(),
custom_rules: CustomRules::default(),
}
}
#[must_use]
pub fn builtin() -> Self {
let mut config = Self::new();
config.load_builtin_rules();
config
}
pub fn from_yaml(yaml: &str) -> Result<Self, serde_yaml_ng::Error> {
serde_yaml_ng::from_str(yaml)
}
pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
serde_json::from_str(json)
}
pub fn from_file(path: &Path) -> Result<Self, ConfigError> {
let content = std::fs::read_to_string(path).map_err(ConfigError::Io)?;
let extension = path.extension().and_then(|e| e.to_str()).unwrap_or("");
match extension.to_lowercase().as_str() {
"yaml" | "yml" => Self::from_yaml(&content).map_err(ConfigError::Yaml),
"json" => Self::from_json(&content).map_err(ConfigError::Json),
_ => {
Self::from_yaml(&content)
.map_err(ConfigError::Yaml)
.or_else(|_| Self::from_json(&content).map_err(ConfigError::Json))
}
}
}
pub fn load_with_precedence(paths: &[&str]) -> Result<Self, ConfigError> {
for path_str in paths {
let path = if path_str.starts_with('~') {
if let Some(home) = dirs::home_dir() {
home.join(&path_str[2..])
} else {
continue;
}
} else {
Path::new(path_str).to_path_buf()
};
if path.exists() {
return Self::from_file(&path);
}
}
Ok(Self::builtin())
}
fn load_builtin_rules(&mut self) {
self.ecosystems.insert(
"pypi".to_string(),
EcosystemConfig {
normalization: NormalizationConfig {
case_sensitive: false,
equivalent_chars: vec![vec!["-".to_string(), "_".to_string(), ".".to_string()]],
collapse_separators: true,
..Default::default()
},
strip_prefixes: vec!["python-".to_string(), "py-".to_string(), "lib".to_string()],
strip_suffixes: vec![
"-python".to_string(),
"-py".to_string(),
"-py3".to_string(),
"-lib".to_string(),
],
aliases: Self::pypi_aliases(),
versioning: VersioningConfig {
spec: VersionSpec::Pep440,
prerelease_tags: vec![
"a".to_string(),
"b".to_string(),
"rc".to_string(),
"alpha".to_string(),
"beta".to_string(),
"dev".to_string(),
"post".to_string(),
],
..Default::default()
},
security: SecurityConfig {
known_typosquats: vec![
TyposquatEntry {
malicious: "python-dateutils".to_string(),
legitimate: "python-dateutil".to_string(),
description: Some("Common typosquat".to_string()),
},
TyposquatEntry {
malicious: "request".to_string(),
legitimate: "requests".to_string(),
description: Some("Missing 's' typosquat".to_string()),
},
],
..Default::default()
},
..Default::default()
},
);
self.ecosystems.insert(
"npm".to_string(),
EcosystemConfig {
normalization: NormalizationConfig {
case_sensitive: false,
scope_handling: ScopeHandling::PreserveScopeCase,
..Default::default()
},
strip_prefixes: vec!["node-".to_string(), "@types/".to_string()],
strip_suffixes: vec!["-js".to_string(), ".js".to_string(), "-node".to_string()],
package_groups: Self::npm_package_groups(),
versioning: VersioningConfig {
spec: VersionSpec::Semver,
prerelease_tags: vec![
"alpha".to_string(),
"beta".to_string(),
"rc".to_string(),
"next".to_string(),
"canary".to_string(),
],
..Default::default()
},
security: SecurityConfig {
suspicious_patterns: vec![
r"^[a-z]{1,2}$".to_string(), ],
..Default::default()
},
..Default::default()
},
);
self.ecosystems.insert(
"cargo".to_string(),
EcosystemConfig {
normalization: NormalizationConfig {
case_sensitive: false,
equivalent_chars: vec![vec!["_".to_string(), "-".to_string()]],
..Default::default()
},
strip_prefixes: vec!["rust-".to_string(), "lib".to_string()],
strip_suffixes: vec!["-rs".to_string(), "-rust".to_string()],
versioning: VersioningConfig {
spec: VersionSpec::Semver,
..Default::default()
},
..Default::default()
},
);
self.ecosystems.insert(
"maven".to_string(),
EcosystemConfig {
normalization: NormalizationConfig {
case_sensitive: true,
use_full_coordinate: true,
..Default::default()
},
group_migrations: vec![GroupMigration {
from: "javax.*".to_string(),
to: "jakarta.*".to_string(),
after_version: Some("9".to_string()),
}],
versioning: VersioningConfig {
spec: VersionSpec::Maven,
qualifier_order: vec![
"alpha".to_string(),
"beta".to_string(),
"milestone".to_string(),
"rc".to_string(),
"snapshot".to_string(),
"final".to_string(),
"ga".to_string(),
"sp".to_string(),
],
..Default::default()
},
..Default::default()
},
);
self.ecosystems.insert(
"golang".to_string(),
EcosystemConfig {
normalization: NormalizationConfig {
case_sensitive: true,
strip_version_suffix: true,
..Default::default()
},
import_mappings: vec![
ImportMapping {
pattern: "github.com/*/*".to_string(),
mapping_type: "github".to_string(),
},
ImportMapping {
pattern: "golang.org/x/*".to_string(),
mapping_type: "stdlib_extension".to_string(),
},
],
versioning: VersioningConfig {
spec: VersionSpec::Gomod,
..Default::default()
},
..Default::default()
},
);
self.ecosystems.insert(
"nuget".to_string(),
EcosystemConfig {
normalization: NormalizationConfig {
case_sensitive: false,
..Default::default()
},
versioning: VersioningConfig {
spec: VersionSpec::Semver,
..Default::default()
},
..Default::default()
},
);
self.ecosystems.insert(
"rubygems".to_string(),
EcosystemConfig {
normalization: NormalizationConfig {
case_sensitive: true,
..Default::default()
},
strip_prefixes: vec!["ruby-".to_string()],
strip_suffixes: vec!["-ruby".to_string(), "-rb".to_string()],
versioning: VersioningConfig {
spec: VersionSpec::Rubygems,
..Default::default()
},
..Default::default()
},
);
self.ecosystems.insert(
"composer".to_string(),
EcosystemConfig {
normalization: NormalizationConfig {
case_sensitive: false,
use_full_coordinate: true,
..Default::default()
},
versioning: VersioningConfig {
spec: VersionSpec::Semver,
..Default::default()
},
..Default::default()
},
);
self.load_cross_ecosystem_mappings();
}
fn pypi_aliases() -> HashMap<String, Vec<String>> {
let mut aliases = HashMap::new();
aliases.insert(
"pillow".to_string(),
vec!["PIL".to_string(), "python-pillow".to_string()],
);
aliases.insert(
"scikit-learn".to_string(),
vec!["sklearn".to_string(), "scikit_learn".to_string()],
);
aliases.insert(
"beautifulsoup4".to_string(),
vec![
"bs4".to_string(),
"BeautifulSoup".to_string(),
"beautifulsoup".to_string(),
],
);
aliases.insert(
"pyyaml".to_string(),
vec!["yaml".to_string(), "PyYAML".to_string()],
);
aliases.insert(
"opencv-python".to_string(),
vec![
"cv2".to_string(),
"opencv-python-headless".to_string(),
"opencv".to_string(),
],
);
aliases.insert("python-dateutil".to_string(), vec!["dateutil".to_string()]);
aliases.insert("attrs".to_string(), vec!["attr".to_string()]);
aliases.insert(
"importlib-metadata".to_string(),
vec!["importlib_metadata".to_string()],
);
aliases.insert(
"typing-extensions".to_string(),
vec!["typing_extensions".to_string()],
);
aliases
}
fn npm_package_groups() -> HashMap<String, PackageGroup> {
let mut groups = HashMap::new();
groups.insert(
"lodash".to_string(),
PackageGroup {
canonical: "lodash".to_string(),
members: vec![
"lodash-es".to_string(),
"lodash.merge".to_string(),
"lodash.get".to_string(),
"lodash.set".to_string(),
"lodash.clonedeep".to_string(),
],
},
);
groups.insert(
"babel".to_string(),
PackageGroup {
canonical: "@babel/core".to_string(),
members: vec!["@babel/*".to_string()],
},
);
groups.insert(
"react".to_string(),
PackageGroup {
canonical: "react".to_string(),
members: vec![
"react-dom".to_string(),
"react-router".to_string(),
"react-redux".to_string(),
],
},
);
groups
}
fn load_cross_ecosystem_mappings(&mut self) {
let mut yaml_mapping = HashMap::new();
yaml_mapping.insert("pypi".to_string(), Some("pyyaml".to_string()));
yaml_mapping.insert("npm".to_string(), Some("js-yaml".to_string()));
yaml_mapping.insert("cargo".to_string(), Some("serde_yaml".to_string()));
yaml_mapping.insert("golang".to_string(), Some("gopkg.in/yaml.v3".to_string()));
yaml_mapping.insert("rubygems".to_string(), Some("psych".to_string()));
self.cross_ecosystem
.insert("yaml_parsing".to_string(), yaml_mapping);
let mut json_mapping = HashMap::new();
json_mapping.insert("pypi".to_string(), None); json_mapping.insert("npm".to_string(), None); json_mapping.insert("cargo".to_string(), Some("serde_json".to_string()));
json_mapping.insert("golang".to_string(), None); self.cross_ecosystem
.insert("json_parsing".to_string(), json_mapping);
let mut http_mapping = HashMap::new();
http_mapping.insert("pypi".to_string(), Some("requests".to_string()));
http_mapping.insert("npm".to_string(), Some("axios".to_string()));
http_mapping.insert("cargo".to_string(), Some("reqwest".to_string()));
http_mapping.insert("golang".to_string(), None); http_mapping.insert("rubygems".to_string(), Some("faraday".to_string()));
self.cross_ecosystem
.insert("http_client".to_string(), http_mapping);
let mut test_mapping = HashMap::new();
test_mapping.insert("pypi".to_string(), Some("pytest".to_string()));
test_mapping.insert("npm".to_string(), Some("jest".to_string()));
test_mapping.insert("cargo".to_string(), None); test_mapping.insert("golang".to_string(), None); test_mapping.insert("rubygems".to_string(), Some("rspec".to_string()));
self.cross_ecosystem
.insert("testing".to_string(), test_mapping);
}
#[must_use]
pub fn get_ecosystem(&self, ecosystem: &str) -> Option<&EcosystemConfig> {
self.ecosystems.get(&ecosystem.to_lowercase())
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.ecosystems.is_empty()
&& self.cross_ecosystem.is_empty()
&& self.custom_rules.equivalences.is_empty()
}
pub fn merge(&mut self, other: Self) {
for (key, value) in other.ecosystems {
self.ecosystems.insert(key, value);
}
for (key, value) in other.cross_ecosystem {
self.cross_ecosystem.insert(key, value);
}
self.custom_rules
.internal_prefixes
.extend(other.custom_rules.internal_prefixes);
self.custom_rules
.equivalences
.extend(other.custom_rules.equivalences);
self.custom_rules
.ignored_packages
.extend(other.custom_rules.ignored_packages);
if other.settings.enable_security_checks != self.settings.enable_security_checks {
self.settings.enable_security_checks = other.settings.enable_security_checks;
}
}
pub fn to_yaml(&self) -> Result<String, serde_yaml_ng::Error> {
serde_yaml_ng::to_string(self)
}
pub fn to_json(&self) -> Result<String, serde_json::Error> {
serde_json::to_string_pretty(self)
}
}
impl Default for EcosystemRulesConfig {
fn default() -> Self {
Self::builtin()
}
}
#[derive(Debug)]
pub enum ConfigError {
Io(std::io::Error),
Yaml(serde_yaml_ng::Error),
Json(serde_json::Error),
}
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => write!(f, "IO error: {e}"),
Self::Yaml(e) => write!(f, "YAML parse error: {e}"),
Self::Json(e) => write!(f, "JSON parse error: {e}"),
}
}
}
impl std::error::Error for ConfigError {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_builtin_config() {
let config = EcosystemRulesConfig::builtin();
assert!(config.ecosystems.contains_key("pypi"));
assert!(config.ecosystems.contains_key("npm"));
assert!(config.ecosystems.contains_key("cargo"));
assert!(config.ecosystems.contains_key("maven"));
assert!(config.ecosystems.contains_key("golang"));
}
#[test]
fn test_pypi_config() {
let config = EcosystemRulesConfig::builtin();
let pypi = config.get_ecosystem("pypi").unwrap();
assert!(!pypi.normalization.case_sensitive);
assert!(!pypi.strip_prefixes.is_empty());
assert!(pypi.aliases.contains_key("pillow"));
assert_eq!(pypi.versioning.spec, VersionSpec::Pep440);
}
#[test]
fn test_npm_config() {
let config = EcosystemRulesConfig::builtin();
let npm = config.get_ecosystem("npm").unwrap();
assert_eq!(
npm.normalization.scope_handling,
ScopeHandling::PreserveScopeCase
);
assert!(npm.package_groups.contains_key("lodash"));
}
#[test]
fn test_cross_ecosystem_mapping() {
let config = EcosystemRulesConfig::builtin();
let yaml_libs = config.cross_ecosystem.get("yaml_parsing").unwrap();
assert_eq!(yaml_libs.get("pypi").unwrap(), &Some("pyyaml".to_string()));
assert_eq!(yaml_libs.get("npm").unwrap(), &Some("js-yaml".to_string()));
}
#[test]
fn test_yaml_parsing() {
let yaml = r#"
version: "1.0"
settings:
case_sensitive_default: false
ecosystems:
custom:
normalization:
case_sensitive: true
strip_prefixes:
- "my-"
strip_suffixes:
- "-custom"
"#;
let config = EcosystemRulesConfig::from_yaml(yaml).unwrap();
assert!(config.ecosystems.contains_key("custom"));
let custom = config.get_ecosystem("custom").unwrap();
assert!(custom.normalization.case_sensitive);
assert_eq!(custom.strip_prefixes, vec!["my-"]);
}
#[test]
fn test_config_merge() {
let mut base = EcosystemRulesConfig::builtin();
let overlay = EcosystemRulesConfig::from_yaml(
r#"
ecosystems:
pypi:
strip_prefixes:
- "custom-"
custom_rules:
internal_prefixes:
- "@mycompany/"
"#,
)
.unwrap();
base.merge(overlay);
let pypi = base.get_ecosystem("pypi").unwrap();
assert_eq!(pypi.strip_prefixes, vec!["custom-"]);
assert!(
base.custom_rules
.internal_prefixes
.contains(&"@mycompany/".to_string())
);
}
#[test]
fn test_security_config() {
let config = EcosystemRulesConfig::builtin();
let pypi = config.get_ecosystem("pypi").unwrap();
assert!(!pypi.security.known_typosquats.is_empty());
let typosquat = &pypi.security.known_typosquats[0];
assert_eq!(typosquat.malicious, "python-dateutils");
assert_eq!(typosquat.legitimate, "python-dateutil");
}
}