use indexmap::IndexMap;
use regex::Regex;
use std::collections::{HashMap, HashSet};
use crate::model::{CanonicalId, Component};
use super::custom_rules::{AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig};
#[derive(Debug, Clone, Default)]
pub struct RuleApplicationResult {
pub canonical_map: HashMap<CanonicalId, CanonicalId>,
pub excluded: HashSet<CanonicalId>,
pub applied_rules: Vec<AppliedRule>,
}
#[derive(Debug, Clone)]
pub struct AppliedRule {
pub component_id: CanonicalId,
pub component_name: String,
pub rule_type: AppliedRuleType,
pub rule_index: usize,
pub rule_name: Option<String>,
}
#[derive(Debug, Clone)]
pub enum AppliedRuleType {
Equivalence { canonical: String },
Exclusion { reason: Option<String> },
}
pub struct RuleEngine {
config: MatchingRulesConfig,
compiled_exclusion_regexes: Vec<Option<Regex>>,
compiled_exclusion_globs: Vec<Option<Regex>>,
compiled_alias_regexes: Vec<Vec<Option<Regex>>>,
compiled_alias_globs: Vec<Vec<Option<Regex>>>,
}
impl RuleEngine {
pub fn new(config: MatchingRulesConfig) -> Result<Self, String> {
let compiled_exclusion_regexes = config
.exclusions
.iter()
.map(|rule| match rule {
ExclusionRule::Exact(_) => Ok(None),
ExclusionRule::Conditional { regex, .. } => regex.as_ref().map_or_else(
|| Ok(None),
|re| {
Regex::new(re)
.map(Some)
.map_err(|e| format!("Invalid exclusion regex '{re}': {e}"))
},
),
})
.collect::<Result<Vec<_>, _>>()?;
let compiled_exclusion_globs = config
.exclusions
.iter()
.map(|rule| match rule {
ExclusionRule::Exact(_) => Ok(None),
ExclusionRule::Conditional { pattern, .. } => pattern
.as_ref()
.map_or_else(|| Ok(None), |pat| compile_glob(pat).map(Some)),
})
.collect::<Result<Vec<_>, _>>()?;
let compiled_alias_regexes = config
.equivalences
.iter()
.map(|eq| {
eq.aliases
.iter()
.map(|alias| match alias {
AliasPattern::Exact(_) => Ok(None),
AliasPattern::Pattern { regex, .. } => regex.as_ref().map_or_else(
|| Ok(None),
|re| {
Regex::new(re)
.map(Some)
.map_err(|e| format!("Invalid alias regex '{re}': {e}"))
},
),
})
.collect::<Result<Vec<_>, _>>()
})
.collect::<Result<Vec<_>, _>>()?;
let compiled_alias_globs = config
.equivalences
.iter()
.map(|eq| {
eq.aliases
.iter()
.map(|alias| match alias {
AliasPattern::Exact(_) => Ok(None),
AliasPattern::Pattern { pattern, .. } => pattern
.as_ref()
.map_or_else(|| Ok(None), |pat| compile_glob(pat).map(Some)),
})
.collect::<Result<Vec<_>, _>>()
})
.collect::<Result<Vec<_>, _>>()?;
Ok(Self {
config,
compiled_exclusion_regexes,
compiled_exclusion_globs,
compiled_alias_regexes,
compiled_alias_globs,
})
}
#[must_use]
pub fn apply(&self, components: &IndexMap<CanonicalId, Component>) -> RuleApplicationResult {
let mut result = RuleApplicationResult::default();
for (id, component) in components {
if let Some(applied) = self.check_exclusions(id, component) {
result.excluded.insert(id.clone());
result.applied_rules.push(applied);
continue;
}
if let Some((canonical_id, applied)) = self.check_equivalences(id, component) {
result.canonical_map.insert(id.clone(), canonical_id);
result.applied_rules.push(applied);
}
}
result
}
fn check_exclusions(&self, id: &CanonicalId, component: &Component) -> Option<AppliedRule> {
for (idx, rule) in self.config.exclusions.iter().enumerate() {
if self.exclusion_matches(rule, idx, component) {
return Some(AppliedRule {
component_id: id.clone(),
component_name: component.name.clone(),
rule_type: AppliedRuleType::Exclusion {
reason: rule.get_reason().map(std::string::ToString::to_string),
},
rule_index: idx,
rule_name: None,
});
}
}
None
}
fn exclusion_matches(
&self,
rule: &ExclusionRule,
rule_idx: usize,
component: &Component,
) -> bool {
match rule {
ExclusionRule::Exact(purl) => component
.identifiers
.purl
.as_ref()
.is_some_and(|p| p == purl),
ExclusionRule::Conditional {
pattern,
regex: _,
ecosystem,
name,
scope: _,
reason: _,
} => {
if let Some(eco) = ecosystem {
let comp_eco = component
.ecosystem
.as_ref()
.map(|e| e.to_string().to_lowercase());
if comp_eco.as_deref() != Some(&eco.to_lowercase()) {
return false;
}
}
if let Some(n) = name
&& !component.name.to_lowercase().contains(&n.to_lowercase())
{
return false;
}
if pattern.is_some() {
if let Some(purl) = &component.identifiers.purl {
if let Some(Some(re)) = self.compiled_exclusion_globs.get(rule_idx)
&& !re.is_match(purl)
{
return false;
}
} else {
return false;
}
}
if let Some(Some(re)) = self.compiled_exclusion_regexes.get(rule_idx) {
if let Some(purl) = &component.identifiers.purl {
if !re.is_match(purl) {
return false;
}
} else {
return false;
}
}
ecosystem.is_some()
|| name.is_some()
|| pattern.is_some()
|| self
.compiled_exclusion_regexes
.get(rule_idx)
.is_some_and(std::option::Option::is_some)
}
}
}
fn check_equivalences(
&self,
id: &CanonicalId,
component: &Component,
) -> Option<(CanonicalId, AppliedRule)> {
let purl = component.identifiers.purl.as_ref()?;
for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
let matches_canonical = purl == &eq.canonical;
let matches_alias = self.alias_matches(eq_idx, eq, purl);
if matches_canonical || matches_alias {
let canonical_id = CanonicalId::from_purl(&eq.canonical);
let applied = AppliedRule {
component_id: id.clone(),
component_name: component.name.clone(),
rule_type: AppliedRuleType::Equivalence {
canonical: eq.canonical.clone(),
},
rule_index: eq_idx,
rule_name: eq.name.clone(),
};
return Some((canonical_id, applied));
}
}
None
}
fn alias_matches(&self, eq_idx: usize, eq: &EquivalenceGroup, purl: &str) -> bool {
let alias_regexes = self.compiled_alias_regexes.get(eq_idx);
let alias_globs = self.compiled_alias_globs.get(eq_idx);
for (alias_idx, alias) in eq.aliases.iter().enumerate() {
let matches = match alias {
AliasPattern::Exact(exact_purl) => purl == exact_purl,
AliasPattern::Pattern {
pattern: _,
regex: _,
ecosystem,
name,
} => {
let mut matched = false;
if let Some(Some(re)) = alias_globs.and_then(|v| v.get(alias_idx))
&& re.is_match(purl)
{
matched = true;
}
if let Some(Some(re)) = alias_regexes.and_then(|v| v.get(alias_idx))
&& re.is_match(purl)
{
matched = true;
}
if let Some(eco) = ecosystem {
let purl_lower = purl.to_lowercase();
let eco_lower = eco.to_lowercase();
if purl_lower.starts_with("pkg:")
&& let Some(rest) = purl_lower.strip_prefix("pkg:")
&& rest.starts_with(&eco_lower)
&& rest[eco_lower.len()..].starts_with('/')
{
matched = true;
}
}
if let Some(n) = name
&& purl.to_lowercase().contains(&n.to_lowercase())
{
matched = true;
}
matched
}
};
if matches {
return true;
}
}
false
}
#[must_use]
pub const fn config(&self) -> &MatchingRulesConfig {
&self.config
}
#[must_use]
pub fn is_excluded(&self, purl: &str) -> bool {
for (idx, rule) in self.config.exclusions.iter().enumerate() {
match rule {
ExclusionRule::Exact(exact) => {
if purl == exact {
return true;
}
}
ExclusionRule::Conditional { pattern, .. } => {
if pattern.is_some()
&& let Some(Some(re)) = self.compiled_exclusion_globs.get(idx)
&& re.is_match(purl)
{
return true;
}
if let Some(Some(re)) = self.compiled_exclusion_regexes.get(idx)
&& re.is_match(purl)
{
return true;
}
}
}
}
false
}
#[must_use]
pub fn get_canonical(&self, purl: &str) -> Option<String> {
for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
if purl == eq.canonical {
return Some(eq.canonical.clone());
}
if self.alias_matches(eq_idx, eq, purl) {
return Some(eq.canonical.clone());
}
}
None
}
}
fn compile_glob(pattern: &str) -> Result<Regex, String> {
let regex_pattern = pattern
.replace('.', "\\.")
.replace('*', ".*")
.replace('?', ".");
Regex::new(&format!("^{regex_pattern}$"))
.map_err(|e| format!("Invalid glob pattern '{pattern}': {e}"))
}
#[cfg(test)]
fn glob_matches(pattern: &str, text: &str) -> bool {
compile_glob(pattern)
.map(|re| re.is_match(text))
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_component(name: &str, purl: Option<&str>) -> Component {
use crate::model::*;
let mut comp = Component::new(name.to_string(), purl.unwrap_or(name).to_string());
comp.version = Some("1.0.0".to_string());
comp.identifiers.purl = purl.map(|s| s.to_string());
comp.ecosystem = Some(Ecosystem::Npm);
comp
}
#[test]
fn test_glob_matches() {
assert!(glob_matches("pkg:npm/*", "pkg:npm/lodash"));
assert!(glob_matches("pkg:npm/lodash*", "pkg:npm/lodash-es"));
assert!(!glob_matches("pkg:npm/*", "pkg:maven/test"));
assert!(glob_matches("*.json", "test.json"));
}
#[test]
fn test_exact_exclusion() {
let config = MatchingRulesConfig {
exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
..Default::default()
};
let engine = RuleEngine::new(config).unwrap();
assert!(engine.is_excluded("pkg:npm/jest"));
assert!(!engine.is_excluded("pkg:npm/lodash"));
}
#[test]
fn test_pattern_exclusion() {
let config = MatchingRulesConfig {
exclusions: vec![ExclusionRule::pattern("pkg:npm/test-*")],
..Default::default()
};
let engine = RuleEngine::new(config).unwrap();
assert!(engine.is_excluded("pkg:npm/test-utils"));
assert!(engine.is_excluded("pkg:npm/test-runner"));
assert!(!engine.is_excluded("pkg:npm/lodash"));
}
#[test]
fn test_equivalence_matching() {
let config = MatchingRulesConfig {
equivalences: vec![EquivalenceGroup {
name: Some("Lodash".to_string()),
canonical: "pkg:npm/lodash".to_string(),
aliases: vec![
AliasPattern::exact("pkg:npm/lodash-es"),
AliasPattern::glob("pkg:npm/lodash.*"),
],
version_sensitive: false,
}],
..Default::default()
};
let engine = RuleEngine::new(config).unwrap();
assert_eq!(
engine.get_canonical("pkg:npm/lodash"),
Some("pkg:npm/lodash".to_string())
);
assert_eq!(
engine.get_canonical("pkg:npm/lodash-es"),
Some("pkg:npm/lodash".to_string())
);
assert_eq!(
engine.get_canonical("pkg:npm/lodash.min"),
Some("pkg:npm/lodash".to_string())
);
assert_eq!(engine.get_canonical("pkg:npm/underscore"), None);
}
#[test]
fn test_apply_rules() {
let config = MatchingRulesConfig {
equivalences: vec![EquivalenceGroup {
name: Some("Lodash".to_string()),
canonical: "pkg:npm/lodash".to_string(),
aliases: vec![AliasPattern::exact("pkg:npm/lodash-es")],
version_sensitive: false,
}],
exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
..Default::default()
};
let engine = RuleEngine::new(config).unwrap();
let mut components = IndexMap::new();
components.insert(
CanonicalId::from_purl("pkg:npm/lodash-es"),
create_test_component("lodash-es", Some("pkg:npm/lodash-es")),
);
components.insert(
CanonicalId::from_purl("pkg:npm/jest"),
create_test_component("jest", Some("pkg:npm/jest")),
);
components.insert(
CanonicalId::from_purl("pkg:npm/react"),
create_test_component("react", Some("pkg:npm/react")),
);
let result = engine.apply(&components);
assert!(
result
.canonical_map
.contains_key(&CanonicalId::from_purl("pkg:npm/lodash-es"))
);
assert!(
result
.excluded
.contains(&CanonicalId::from_purl("pkg:npm/jest"))
);
assert!(
!result
.canonical_map
.contains_key(&CanonicalId::from_purl("pkg:npm/react"))
);
assert!(
!result
.excluded
.contains(&CanonicalId::from_purl("pkg:npm/react"))
);
assert_eq!(result.applied_rules.len(), 2);
}
}