Skip to main content

sbom_tools/matching/
custom_rules.rs

1//! Custom component matching rules configuration.
2//!
3//! This module provides data structures for user-defined matching rules
4//! that can be loaded from YAML configuration files.
5
6use serde::{Deserialize, Serialize};
7
8/// Root configuration for custom matching rules
9#[derive(Debug, Clone, Default, Deserialize, Serialize)]
10pub struct MatchingRulesConfig {
11    /// Rule precedence strategy
12    #[serde(default)]
13    pub precedence: RulePrecedence,
14
15    /// Component equivalence groups
16    #[serde(default)]
17    pub equivalences: Vec<EquivalenceGroup>,
18
19    /// Component exclusion rules
20    #[serde(default)]
21    pub exclusions: Vec<ExclusionRule>,
22}
23
24/// Rule precedence strategy when multiple rules match
25#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
26#[serde(rename_all = "kebab-case")]
27pub enum RulePrecedence {
28    /// First matching rule wins
29    #[default]
30    FirstMatch,
31    /// Most specific rule wins (longer patterns, exact matches)
32    MostSpecific,
33}
34
35impl std::fmt::Display for RulePrecedence {
36    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37        match self {
38            Self::FirstMatch => write!(f, "first-match"),
39            Self::MostSpecific => write!(f, "most-specific"),
40        }
41    }
42}
43
44/// Defines a group of components that should be treated as equivalent
45#[derive(Debug, Clone, Deserialize, Serialize)]
46pub struct EquivalenceGroup {
47    /// Optional name for this rule (for logging/debugging)
48    #[serde(default)]
49    pub name: Option<String>,
50
51    /// The canonical identifier (others will be mapped to this)
52    pub canonical: String,
53
54    /// Aliases that should map to the canonical
55    #[serde(default)]
56    pub aliases: Vec<AliasPattern>,
57
58    /// Whether version must also match for equivalence
59    #[serde(default)]
60    pub version_sensitive: bool,
61}
62
63/// Pattern for matching component aliases
64#[derive(Debug, Clone, Deserialize, Serialize)]
65#[serde(untagged)]
66pub enum AliasPattern {
67    /// Exact PURL match
68    Exact(String),
69
70    /// Pattern-based match
71    Pattern {
72        /// Glob pattern (e.g., "pkg:maven/org.apache.logging.log4j/*")
73        #[serde(default)]
74        pattern: Option<String>,
75
76        /// Regex pattern
77        #[serde(default)]
78        regex: Option<String>,
79
80        /// Match by ecosystem
81        #[serde(default)]
82        ecosystem: Option<String>,
83
84        /// Match by name (within ecosystem)
85        #[serde(default)]
86        name: Option<String>,
87    },
88}
89
90impl AliasPattern {
91    /// Create an exact match pattern
92    pub fn exact(purl: impl Into<String>) -> Self {
93        Self::Exact(purl.into())
94    }
95
96    /// Create a glob pattern match
97    pub fn glob(pattern: impl Into<String>) -> Self {
98        Self::Pattern {
99            pattern: Some(pattern.into()),
100            regex: None,
101            ecosystem: None,
102            name: None,
103        }
104    }
105
106    /// Create a regex pattern match
107    pub fn regex(pattern: impl Into<String>) -> Self {
108        Self::Pattern {
109            pattern: None,
110            regex: Some(pattern.into()),
111            ecosystem: None,
112            name: None,
113        }
114    }
115
116    /// Get a description of this pattern for display
117    #[must_use]
118    pub fn description(&self) -> String {
119        match self {
120            Self::Exact(purl) => format!("exact:{purl}"),
121            Self::Pattern {
122                pattern,
123                regex,
124                ecosystem,
125                name,
126            } => {
127                let mut parts = Vec::new();
128                if let Some(p) = pattern {
129                    parts.push(format!("pattern:{p}"));
130                }
131                if let Some(r) = regex {
132                    parts.push(format!("regex:{r}"));
133                }
134                if let Some(e) = ecosystem {
135                    parts.push(format!("ecosystem:{e}"));
136                }
137                if let Some(n) = name {
138                    parts.push(format!("name:{n}"));
139                }
140                parts.join(", ")
141            }
142        }
143    }
144}
145
146/// Rule for excluding components from diff analysis
147#[derive(Debug, Clone, Deserialize, Serialize)]
148#[serde(untagged)]
149pub enum ExclusionRule {
150    /// Exact PURL match
151    Exact(String),
152
153    /// Conditional exclusion
154    Conditional {
155        /// Glob pattern
156        #[serde(default)]
157        pattern: Option<String>,
158
159        /// Regex pattern
160        #[serde(default)]
161        regex: Option<String>,
162
163        /// Match by ecosystem (npm, maven, pypi, etc.)
164        #[serde(default)]
165        ecosystem: Option<String>,
166
167        /// Match by component name
168        #[serde(default)]
169        name: Option<String>,
170
171        /// Match by dependency scope (dev, test, build, runtime)
172        #[serde(default)]
173        scope: Option<String>,
174
175        /// Reason for exclusion (for reporting)
176        #[serde(default)]
177        reason: Option<String>,
178    },
179}
180
181impl ExclusionRule {
182    /// Create an exact match exclusion
183    pub fn exact(purl: impl Into<String>) -> Self {
184        Self::Exact(purl.into())
185    }
186
187    /// Create a pattern-based exclusion
188    pub fn pattern(pattern: impl Into<String>) -> Self {
189        Self::Conditional {
190            pattern: Some(pattern.into()),
191            regex: None,
192            ecosystem: None,
193            name: None,
194            scope: None,
195            reason: None,
196        }
197    }
198
199    /// Create an ecosystem-based exclusion
200    pub fn ecosystem(ecosystem: impl Into<String>) -> Self {
201        Self::Conditional {
202            pattern: None,
203            regex: None,
204            ecosystem: Some(ecosystem.into()),
205            name: None,
206            scope: None,
207            reason: None,
208        }
209    }
210
211    /// Get the reason for this exclusion, if any
212    #[must_use]
213    pub fn get_reason(&self) -> Option<&str> {
214        match self {
215            Self::Exact(_) => None,
216            Self::Conditional { reason, .. } => reason.as_deref(),
217        }
218    }
219
220    /// Get a description of this rule for display
221    #[must_use]
222    pub fn description(&self) -> String {
223        match self {
224            Self::Exact(purl) => format!("exact:{purl}"),
225            Self::Conditional {
226                pattern,
227                regex,
228                ecosystem,
229                name,
230                scope,
231                reason,
232            } => {
233                let mut parts = Vec::new();
234                if let Some(p) = pattern {
235                    parts.push(format!("pattern:{p}"));
236                }
237                if let Some(r) = regex {
238                    parts.push(format!("regex:{r}"));
239                }
240                if let Some(e) = ecosystem {
241                    parts.push(format!("ecosystem:{e}"));
242                }
243                if let Some(n) = name {
244                    parts.push(format!("name:{n}"));
245                }
246                if let Some(s) = scope {
247                    parts.push(format!("scope:{s}"));
248                }
249                if let Some(r) = reason {
250                    parts.push(format!("reason:{r}"));
251                }
252                parts.join(", ")
253            }
254        }
255    }
256}
257
258impl MatchingRulesConfig {
259    /// Load rules from a YAML string
260    pub fn from_yaml(yaml: &str) -> Result<Self, serde_yaml_ng::Error> {
261        serde_yaml_ng::from_str(yaml)
262    }
263
264    /// Load rules from a YAML file
265    pub fn from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
266        let content = std::fs::read_to_string(path)?;
267        let config = Self::from_yaml(&content)?;
268        Ok(config)
269    }
270
271    /// Get summary statistics about the rules
272    #[must_use]
273    pub fn summary(&self) -> RulesSummary {
274        RulesSummary {
275            equivalence_groups: self.equivalences.len(),
276            total_aliases: self.equivalences.iter().map(|e| e.aliases.len()).sum(),
277            exclusion_rules: self.exclusions.len(),
278            precedence: self.precedence,
279        }
280    }
281
282    /// Check if the configuration is empty (no rules defined)
283    #[must_use]
284    pub fn is_empty(&self) -> bool {
285        self.equivalences.is_empty() && self.exclusions.is_empty()
286    }
287}
288
289/// Summary of matching rules configuration
290#[derive(Debug, Clone)]
291pub struct RulesSummary {
292    pub equivalence_groups: usize,
293    pub total_aliases: usize,
294    pub exclusion_rules: usize,
295    pub precedence: RulePrecedence,
296}
297
298impl std::fmt::Display for RulesSummary {
299    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300        write!(
301            f,
302            "{} equivalence group(s) ({} aliases), {} exclusion rule(s), precedence: {}",
303            self.equivalence_groups, self.total_aliases, self.exclusion_rules, self.precedence
304        )
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn test_parse_yaml_config() {
314        let yaml = r#"
315precedence: first-match
316equivalences:
317  - name: "Log4j family"
318    canonical: "pkg:maven/org.apache.logging.log4j/log4j-core"
319    aliases:
320      - "pkg:maven/org.apache.logging.log4j/log4j-api"
321      - pattern: "pkg:maven/org.apache.logging.log4j/log4j-*"
322exclusions:
323  - "pkg:maven/junit/junit"
324  - ecosystem: "npm"
325    scope: "dev"
326    reason: "Excluding npm dev dependencies"
327"#;
328
329        let config = MatchingRulesConfig::from_yaml(yaml).expect("Failed to parse YAML");
330        assert_eq!(config.precedence, RulePrecedence::FirstMatch);
331        assert_eq!(config.equivalences.len(), 1);
332        assert_eq!(config.equivalences[0].aliases.len(), 2);
333        assert_eq!(config.exclusions.len(), 2);
334    }
335
336    #[test]
337    fn test_empty_config() {
338        let config = MatchingRulesConfig::default();
339        assert!(config.is_empty());
340        assert_eq!(config.precedence, RulePrecedence::FirstMatch);
341    }
342
343    #[test]
344    fn test_alias_pattern_description() {
345        let exact = AliasPattern::exact("pkg:npm/lodash");
346        assert!(exact.description().contains("exact:"));
347
348        let glob = AliasPattern::glob("pkg:maven/*");
349        assert!(glob.description().contains("pattern:"));
350    }
351
352    #[test]
353    fn test_exclusion_rule_description() {
354        let exact = ExclusionRule::exact("pkg:npm/jest");
355        assert!(exact.description().contains("exact:"));
356
357        let ecosystem = ExclusionRule::ecosystem("npm");
358        assert!(ecosystem.description().contains("ecosystem:"));
359    }
360
361    #[test]
362    fn test_rules_summary() {
363        let config = MatchingRulesConfig {
364            precedence: RulePrecedence::MostSpecific,
365            equivalences: vec![EquivalenceGroup {
366                name: Some("Test".to_string()),
367                canonical: "pkg:npm/test".to_string(),
368                aliases: vec![
369                    AliasPattern::exact("pkg:npm/test-alias"),
370                    AliasPattern::exact("pkg:npm/test-other"),
371                ],
372                version_sensitive: false,
373            }],
374            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
375        };
376
377        let summary = config.summary();
378        assert_eq!(summary.equivalence_groups, 1);
379        assert_eq!(summary.total_aliases, 2);
380        assert_eq!(summary.exclusion_rules, 1);
381        assert_eq!(summary.precedence, RulePrecedence::MostSpecific);
382    }
383}