Skip to main content

sbom_tools/matching/
custom_rules.rs

1//! Custom component matching rules configuration.
2//!
3//! This module provides data structures for user-defined matching rules
4//! that can be loaded from YAML configuration files.
5
6use serde::{Deserialize, Serialize};
7
8/// Root configuration for custom matching rules
9#[derive(Debug, Clone, Default, Deserialize, Serialize)]
10pub struct MatchingRulesConfig {
11    /// Rule precedence strategy
12    #[serde(default)]
13    pub precedence: RulePrecedence,
14
15    /// Component equivalence groups
16    #[serde(default)]
17    pub equivalences: Vec<EquivalenceGroup>,
18
19    /// Component exclusion rules
20    #[serde(default)]
21    pub exclusions: Vec<ExclusionRule>,
22}
23
24/// Rule precedence strategy when multiple rules match
25#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
26#[serde(rename_all = "kebab-case")]
27pub enum RulePrecedence {
28    /// First matching rule wins
29    #[default]
30    FirstMatch,
31    /// Most specific rule wins (longer patterns, exact matches)
32    MostSpecific,
33}
34
35impl std::fmt::Display for RulePrecedence {
36    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37        match self {
38            Self::FirstMatch => write!(f, "first-match"),
39            Self::MostSpecific => write!(f, "most-specific"),
40        }
41    }
42}
43
44/// Defines a group of components that should be treated as equivalent
45#[derive(Debug, Clone, Deserialize, Serialize)]
46pub struct EquivalenceGroup {
47    /// Optional name for this rule (for logging/debugging)
48    #[serde(default)]
49    pub name: Option<String>,
50
51    /// The canonical identifier (others will be mapped to this)
52    pub canonical: String,
53
54    /// Aliases that should map to the canonical
55    #[serde(default)]
56    pub aliases: Vec<AliasPattern>,
57
58    /// Whether version must also match for equivalence
59    #[serde(default)]
60    pub version_sensitive: bool,
61}
62
63/// Pattern for matching component aliases
64#[derive(Debug, Clone, Deserialize, Serialize)]
65#[serde(untagged)]
66pub enum AliasPattern {
67    /// Exact PURL match
68    Exact(String),
69
70    /// Pattern-based match
71    Pattern {
72        /// Glob pattern (e.g., "pkg:maven/org.apache.logging.log4j/*")
73        #[serde(default)]
74        pattern: Option<String>,
75
76        /// Regex pattern
77        #[serde(default)]
78        regex: Option<String>,
79
80        /// Match by ecosystem
81        #[serde(default)]
82        ecosystem: Option<String>,
83
84        /// Match by name (within ecosystem)
85        #[serde(default)]
86        name: Option<String>,
87    },
88}
89
90impl AliasPattern {
91    /// Create an exact match pattern
92    pub fn exact(purl: impl Into<String>) -> Self {
93        Self::Exact(purl.into())
94    }
95
96    /// Create a glob pattern match
97    pub fn glob(pattern: impl Into<String>) -> Self {
98        Self::Pattern {
99            pattern: Some(pattern.into()),
100            regex: None,
101            ecosystem: None,
102            name: None,
103        }
104    }
105
106    /// Create a regex pattern match
107    pub fn regex(pattern: impl Into<String>) -> Self {
108        Self::Pattern {
109            pattern: None,
110            regex: Some(pattern.into()),
111            ecosystem: None,
112            name: None,
113        }
114    }
115
116    /// Get a description of this pattern for display
117    pub fn description(&self) -> String {
118        match self {
119            Self::Exact(purl) => format!("exact:{}", purl),
120            Self::Pattern {
121                pattern,
122                regex,
123                ecosystem,
124                name,
125            } => {
126                let mut parts = Vec::new();
127                if let Some(p) = pattern {
128                    parts.push(format!("pattern:{}", p));
129                }
130                if let Some(r) = regex {
131                    parts.push(format!("regex:{}", r));
132                }
133                if let Some(e) = ecosystem {
134                    parts.push(format!("ecosystem:{}", e));
135                }
136                if let Some(n) = name {
137                    parts.push(format!("name:{}", n));
138                }
139                parts.join(", ")
140            }
141        }
142    }
143}
144
145/// Rule for excluding components from diff analysis
146#[derive(Debug, Clone, Deserialize, Serialize)]
147#[serde(untagged)]
148pub enum ExclusionRule {
149    /// Exact PURL match
150    Exact(String),
151
152    /// Conditional exclusion
153    Conditional {
154        /// Glob pattern
155        #[serde(default)]
156        pattern: Option<String>,
157
158        /// Regex pattern
159        #[serde(default)]
160        regex: Option<String>,
161
162        /// Match by ecosystem (npm, maven, pypi, etc.)
163        #[serde(default)]
164        ecosystem: Option<String>,
165
166        /// Match by component name
167        #[serde(default)]
168        name: Option<String>,
169
170        /// Match by dependency scope (dev, test, build, runtime)
171        #[serde(default)]
172        scope: Option<String>,
173
174        /// Reason for exclusion (for reporting)
175        #[serde(default)]
176        reason: Option<String>,
177    },
178}
179
180impl ExclusionRule {
181    /// Create an exact match exclusion
182    pub fn exact(purl: impl Into<String>) -> Self {
183        Self::Exact(purl.into())
184    }
185
186    /// Create a pattern-based exclusion
187    pub fn pattern(pattern: impl Into<String>) -> Self {
188        Self::Conditional {
189            pattern: Some(pattern.into()),
190            regex: None,
191            ecosystem: None,
192            name: None,
193            scope: None,
194            reason: None,
195        }
196    }
197
198    /// Create an ecosystem-based exclusion
199    pub fn ecosystem(ecosystem: impl Into<String>) -> Self {
200        Self::Conditional {
201            pattern: None,
202            regex: None,
203            ecosystem: Some(ecosystem.into()),
204            name: None,
205            scope: None,
206            reason: None,
207        }
208    }
209
210    /// Get the reason for this exclusion, if any
211    pub fn get_reason(&self) -> Option<&str> {
212        match self {
213            Self::Exact(_) => None,
214            Self::Conditional { reason, .. } => reason.as_deref(),
215        }
216    }
217
218    /// Get a description of this rule for display
219    pub fn description(&self) -> String {
220        match self {
221            Self::Exact(purl) => format!("exact:{}", purl),
222            Self::Conditional {
223                pattern,
224                regex,
225                ecosystem,
226                name,
227                scope,
228                reason,
229            } => {
230                let mut parts = Vec::new();
231                if let Some(p) = pattern {
232                    parts.push(format!("pattern:{}", p));
233                }
234                if let Some(r) = regex {
235                    parts.push(format!("regex:{}", r));
236                }
237                if let Some(e) = ecosystem {
238                    parts.push(format!("ecosystem:{}", e));
239                }
240                if let Some(n) = name {
241                    parts.push(format!("name:{}", n));
242                }
243                if let Some(s) = scope {
244                    parts.push(format!("scope:{}", s));
245                }
246                if let Some(r) = reason {
247                    parts.push(format!("reason:{}", r));
248                }
249                parts.join(", ")
250            }
251        }
252    }
253}
254
255impl MatchingRulesConfig {
256    /// Load rules from a YAML string
257    pub fn from_yaml(yaml: &str) -> Result<Self, serde_yaml_ng::Error> {
258        serde_yaml_ng::from_str(yaml)
259    }
260
261    /// Load rules from a YAML file
262    pub fn from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
263        let content = std::fs::read_to_string(path)?;
264        let config = Self::from_yaml(&content)?;
265        Ok(config)
266    }
267
268    /// Get summary statistics about the rules
269    pub fn summary(&self) -> RulesSummary {
270        RulesSummary {
271            equivalence_groups: self.equivalences.len(),
272            total_aliases: self.equivalences.iter().map(|e| e.aliases.len()).sum(),
273            exclusion_rules: self.exclusions.len(),
274            precedence: self.precedence,
275        }
276    }
277
278    /// Check if the configuration is empty (no rules defined)
279    pub fn is_empty(&self) -> bool {
280        self.equivalences.is_empty() && self.exclusions.is_empty()
281    }
282}
283
284/// Summary of matching rules configuration
285#[derive(Debug, Clone)]
286pub struct RulesSummary {
287    pub equivalence_groups: usize,
288    pub total_aliases: usize,
289    pub exclusion_rules: usize,
290    pub precedence: RulePrecedence,
291}
292
293impl std::fmt::Display for RulesSummary {
294    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
295        write!(
296            f,
297            "{} equivalence group(s) ({} aliases), {} exclusion rule(s), precedence: {}",
298            self.equivalence_groups, self.total_aliases, self.exclusion_rules, self.precedence
299        )
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn test_parse_yaml_config() {
309        let yaml = r#"
310precedence: first-match
311equivalences:
312  - name: "Log4j family"
313    canonical: "pkg:maven/org.apache.logging.log4j/log4j-core"
314    aliases:
315      - "pkg:maven/org.apache.logging.log4j/log4j-api"
316      - pattern: "pkg:maven/org.apache.logging.log4j/log4j-*"
317exclusions:
318  - "pkg:maven/junit/junit"
319  - ecosystem: "npm"
320    scope: "dev"
321    reason: "Excluding npm dev dependencies"
322"#;
323
324        let config = MatchingRulesConfig::from_yaml(yaml).expect("Failed to parse YAML");
325        assert_eq!(config.precedence, RulePrecedence::FirstMatch);
326        assert_eq!(config.equivalences.len(), 1);
327        assert_eq!(config.equivalences[0].aliases.len(), 2);
328        assert_eq!(config.exclusions.len(), 2);
329    }
330
331    #[test]
332    fn test_empty_config() {
333        let config = MatchingRulesConfig::default();
334        assert!(config.is_empty());
335        assert_eq!(config.precedence, RulePrecedence::FirstMatch);
336    }
337
338    #[test]
339    fn test_alias_pattern_description() {
340        let exact = AliasPattern::exact("pkg:npm/lodash");
341        assert!(exact.description().contains("exact:"));
342
343        let glob = AliasPattern::glob("pkg:maven/*");
344        assert!(glob.description().contains("pattern:"));
345    }
346
347    #[test]
348    fn test_exclusion_rule_description() {
349        let exact = ExclusionRule::exact("pkg:npm/jest");
350        assert!(exact.description().contains("exact:"));
351
352        let ecosystem = ExclusionRule::ecosystem("npm");
353        assert!(ecosystem.description().contains("ecosystem:"));
354    }
355
356    #[test]
357    fn test_rules_summary() {
358        let config = MatchingRulesConfig {
359            precedence: RulePrecedence::MostSpecific,
360            equivalences: vec![EquivalenceGroup {
361                name: Some("Test".to_string()),
362                canonical: "pkg:npm/test".to_string(),
363                aliases: vec![
364                    AliasPattern::exact("pkg:npm/test-alias"),
365                    AliasPattern::exact("pkg:npm/test-other"),
366                ],
367                version_sensitive: false,
368            }],
369            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
370        };
371
372        let summary = config.summary();
373        assert_eq!(summary.equivalence_groups, 1);
374        assert_eq!(summary.total_aliases, 2);
375        assert_eq!(summary.exclusion_rules, 1);
376        assert_eq!(summary.precedence, RulePrecedence::MostSpecific);
377    }
378}