Skip to main content

sbom_tools/matching/
rule_engine.rs

1//! Rule engine for applying custom matching rules.
2//!
3//! This module provides the engine that applies custom matching rules
4//! to components during the diff process.
5
6use indexmap::IndexMap;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10use crate::model::{CanonicalId, Component};
11
12use super::custom_rules::{AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig};
13
14/// Result of applying matching rules to components
15#[derive(Debug, Clone, Default)]
16pub struct RuleApplicationResult {
17    /// Original ID -> Canonical ID mapping (for equivalences)
18    pub canonical_map: HashMap<CanonicalId, CanonicalId>,
19    /// IDs that should be excluded from diff
20    pub excluded: HashSet<CanonicalId>,
21    /// Log of which rules were applied
22    pub applied_rules: Vec<AppliedRule>,
23}
24
25/// Record of a rule being applied to a component
26#[derive(Debug, Clone)]
27pub struct AppliedRule {
28    /// The component that was affected
29    pub component_id: CanonicalId,
30    /// The component name
31    pub component_name: String,
32    /// The type of rule applied
33    pub rule_type: AppliedRuleType,
34    /// Index of the rule in the config
35    pub rule_index: usize,
36    /// Name of the rule (if any)
37    pub rule_name: Option<String>,
38}
39
40/// Type of rule that was applied
41#[derive(Debug, Clone)]
42pub enum AppliedRuleType {
43    /// Component was mapped to a canonical ID
44    Equivalence { canonical: String },
45    /// Component was excluded
46    Exclusion { reason: Option<String> },
47}
48
49/// Engine for applying custom matching rules
50pub struct RuleEngine {
51    config: MatchingRulesConfig,
52    /// Compiled regex patterns for exclusions
53    compiled_exclusion_regexes: Vec<Option<Regex>>,
54    /// Compiled glob patterns for exclusions (converted to regex)
55    compiled_exclusion_globs: Vec<Option<Regex>>,
56    /// Compiled regex patterns for equivalence aliases
57    compiled_alias_regexes: Vec<Vec<Option<Regex>>>,
58    /// Compiled glob patterns for equivalence aliases (converted to regex)
59    compiled_alias_globs: Vec<Vec<Option<Regex>>>,
60}
61
62impl RuleEngine {
63    /// Create a new rule engine from configuration
64    pub fn new(config: MatchingRulesConfig) -> Result<Self, String> {
65        // Pre-compile regex patterns for exclusions
66        let compiled_exclusion_regexes = config
67            .exclusions
68            .iter()
69            .map(|rule| match rule {
70                ExclusionRule::Exact(_) => Ok(None),
71                ExclusionRule::Conditional { regex, .. } => {
72                    regex.as_ref().map_or_else(
73                        || Ok(None),
74                        |re| Regex::new(re)
75                            .map(Some)
76                            .map_err(|e| format!("Invalid exclusion regex '{re}': {e}")),
77                    )
78                }
79            })
80            .collect::<Result<Vec<_>, _>>()?;
81
82        // Pre-compile glob patterns for exclusions
83        let compiled_exclusion_globs = config
84            .exclusions
85            .iter()
86            .map(|rule| match rule {
87                ExclusionRule::Exact(_) => Ok(None),
88                ExclusionRule::Conditional { pattern, .. } => {
89                    pattern.as_ref().map_or_else(|| Ok(None), |pat| compile_glob(pat).map(Some))
90                }
91            })
92            .collect::<Result<Vec<_>, _>>()?;
93
94        // Pre-compile regex patterns for equivalence aliases
95        let compiled_alias_regexes = config
96            .equivalences
97            .iter()
98            .map(|eq| {
99                eq.aliases
100                    .iter()
101                    .map(|alias| match alias {
102                        AliasPattern::Exact(_) => Ok(None),
103                        AliasPattern::Pattern { regex, .. } => {
104                            regex.as_ref().map_or_else(
105                                || Ok(None),
106                                |re| Regex::new(re)
107                                    .map(Some)
108                                    .map_err(|e| format!("Invalid alias regex '{re}': {e}")),
109                            )
110                        }
111                    })
112                    .collect::<Result<Vec<_>, _>>()
113            })
114            .collect::<Result<Vec<_>, _>>()?;
115
116        // Pre-compile glob patterns for equivalence aliases
117        let compiled_alias_globs = config
118            .equivalences
119            .iter()
120            .map(|eq| {
121                eq.aliases
122                    .iter()
123                    .map(|alias| match alias {
124                        AliasPattern::Exact(_) => Ok(None),
125                        AliasPattern::Pattern { pattern, .. } => {
126                            pattern.as_ref().map_or_else(|| Ok(None), |pat| compile_glob(pat).map(Some))
127                        }
128                    })
129                    .collect::<Result<Vec<_>, _>>()
130            })
131            .collect::<Result<Vec<_>, _>>()?;
132
133        Ok(Self {
134            config,
135            compiled_exclusion_regexes,
136            compiled_exclusion_globs,
137            compiled_alias_regexes,
138            compiled_alias_globs,
139        })
140    }
141
142    /// Apply rules to a set of components
143    #[must_use] 
144    pub fn apply(&self, components: &IndexMap<CanonicalId, Component>) -> RuleApplicationResult {
145        let mut result = RuleApplicationResult::default();
146
147        for (id, component) in components {
148            // Check exclusions first
149            if let Some(applied) = self.check_exclusions(id, component) {
150                result.excluded.insert(id.clone());
151                result.applied_rules.push(applied);
152                continue;
153            }
154
155            // Check equivalences
156            if let Some((canonical_id, applied)) = self.check_equivalences(id, component) {
157                result.canonical_map.insert(id.clone(), canonical_id);
158                result.applied_rules.push(applied);
159            }
160        }
161
162        result
163    }
164
165    /// Check if a component should be excluded
166    fn check_exclusions(&self, id: &CanonicalId, component: &Component) -> Option<AppliedRule> {
167        for (idx, rule) in self.config.exclusions.iter().enumerate() {
168            if self.exclusion_matches(rule, idx, component) {
169                return Some(AppliedRule {
170                    component_id: id.clone(),
171                    component_name: component.name.clone(),
172                    rule_type: AppliedRuleType::Exclusion {
173                        reason: rule.get_reason().map(std::string::ToString::to_string),
174                    },
175                    rule_index: idx,
176                    rule_name: None,
177                });
178            }
179        }
180        None
181    }
182
183    /// Check if an exclusion rule matches a component
184    fn exclusion_matches(
185        &self,
186        rule: &ExclusionRule,
187        rule_idx: usize,
188        component: &Component,
189    ) -> bool {
190        match rule {
191            ExclusionRule::Exact(purl) => component
192                .identifiers
193                .purl
194                .as_ref()
195                .is_some_and(|p| p == purl),
196            ExclusionRule::Conditional {
197                pattern,
198                regex: _,
199                ecosystem,
200                name,
201                scope: _,
202                reason: _,
203            } => {
204                // Check ecosystem
205                if let Some(eco) = ecosystem {
206                    let comp_eco = component
207                        .ecosystem
208                        .as_ref()
209                        .map(|e| e.to_string().to_lowercase());
210                    if comp_eco.as_deref() != Some(&eco.to_lowercase()) {
211                        return false;
212                    }
213                }
214
215                // Check name
216                if let Some(n) = name {
217                    if !component.name.to_lowercase().contains(&n.to_lowercase()) {
218                        return false;
219                    }
220                }
221
222                // Check pre-compiled glob pattern
223                if pattern.is_some() {
224                    if let Some(purl) = &component.identifiers.purl {
225                        if let Some(Some(re)) = self.compiled_exclusion_globs.get(rule_idx) {
226                            if !re.is_match(purl) {
227                                return false;
228                            }
229                        }
230                    } else {
231                        return false;
232                    }
233                }
234
235                // Check compiled regex
236                if let Some(Some(re)) = self.compiled_exclusion_regexes.get(rule_idx) {
237                    if let Some(purl) = &component.identifiers.purl {
238                        if !re.is_match(purl) {
239                            return false;
240                        }
241                    } else {
242                        return false;
243                    }
244                }
245
246                // If we get here and at least one condition was specified, it matched
247                ecosystem.is_some()
248                    || name.is_some()
249                    || pattern.is_some()
250                    || self
251                        .compiled_exclusion_regexes
252                        .get(rule_idx)
253                        .is_some_and(std::option::Option::is_some)
254            }
255        }
256    }
257
258    /// Check if a component matches any equivalence group
259    fn check_equivalences(
260        &self,
261        id: &CanonicalId,
262        component: &Component,
263    ) -> Option<(CanonicalId, AppliedRule)> {
264        let purl = component.identifiers.purl.as_ref()?;
265
266        for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
267            // Check if the PURL matches the canonical or any alias
268            let matches_canonical = purl == &eq.canonical;
269            let matches_alias = self.alias_matches(eq_idx, eq, purl);
270
271            if matches_canonical || matches_alias {
272                let canonical_id = CanonicalId::from_purl(&eq.canonical);
273                let applied = AppliedRule {
274                    component_id: id.clone(),
275                    component_name: component.name.clone(),
276                    rule_type: AppliedRuleType::Equivalence {
277                        canonical: eq.canonical.clone(),
278                    },
279                    rule_index: eq_idx,
280                    rule_name: eq.name.clone(),
281                };
282                return Some((canonical_id, applied));
283            }
284        }
285
286        None
287    }
288
289    /// Check if a PURL matches any alias in an equivalence group
290    fn alias_matches(&self, eq_idx: usize, eq: &EquivalenceGroup, purl: &str) -> bool {
291        let alias_regexes = self.compiled_alias_regexes.get(eq_idx);
292        let alias_globs = self.compiled_alias_globs.get(eq_idx);
293
294        for (alias_idx, alias) in eq.aliases.iter().enumerate() {
295            let matches = match alias {
296                AliasPattern::Exact(exact_purl) => purl == exact_purl,
297                AliasPattern::Pattern {
298                    pattern: _,
299                    regex: _,
300                    ecosystem,
301                    name,
302                } => {
303                    let mut matched = false;
304
305                    // Check pre-compiled glob pattern
306                    if let Some(Some(re)) = alias_globs.and_then(|v| v.get(alias_idx)) {
307                        if re.is_match(purl) {
308                            matched = true;
309                        }
310                    }
311
312                    // Check regex
313                    if let Some(Some(re)) = alias_regexes.and_then(|v| v.get(alias_idx)) {
314                        if re.is_match(purl) {
315                            matched = true;
316                        }
317                    }
318
319                    // Check ecosystem match in PURL
320                    if let Some(eco) = ecosystem {
321                        let purl_lower = purl.to_lowercase();
322                        let eco_lower = eco.to_lowercase();
323                        // Check if PURL starts with pkg:<ecosystem>/
324                        if purl_lower.starts_with("pkg:") {
325                            if let Some(rest) = purl_lower.strip_prefix("pkg:") {
326                                if rest.starts_with(&eco_lower)
327                                    && rest[eco_lower.len()..].starts_with('/')
328                                {
329                                    matched = true;
330                                }
331                            }
332                        }
333                    }
334
335                    // Check name match in PURL
336                    if let Some(n) = name {
337                        if purl.to_lowercase().contains(&n.to_lowercase()) {
338                            matched = true;
339                        }
340                    }
341
342                    matched
343                }
344            };
345
346            if matches {
347                return true;
348            }
349        }
350
351        false
352    }
353
354    /// Get the configuration
355    #[must_use] 
356    pub const fn config(&self) -> &MatchingRulesConfig {
357        &self.config
358    }
359
360    /// Check if a PURL is excluded by any rule
361    #[must_use] 
362    pub fn is_excluded(&self, purl: &str) -> bool {
363        for (idx, rule) in self.config.exclusions.iter().enumerate() {
364            match rule {
365                ExclusionRule::Exact(exact) => {
366                    if purl == exact {
367                        return true;
368                    }
369                }
370                ExclusionRule::Conditional { pattern, .. } => {
371                    // Check pre-compiled glob pattern
372                    if pattern.is_some() {
373                        if let Some(Some(re)) = self.compiled_exclusion_globs.get(idx) {
374                            if re.is_match(purl) {
375                                return true;
376                            }
377                        }
378                    }
379                    // Check pre-compiled regex
380                    if let Some(Some(re)) = self.compiled_exclusion_regexes.get(idx) {
381                        if re.is_match(purl) {
382                            return true;
383                        }
384                    }
385                }
386            }
387        }
388        false
389    }
390
391    /// Get the canonical PURL for a given PURL, if any equivalence applies
392    #[must_use] 
393    pub fn get_canonical(&self, purl: &str) -> Option<String> {
394        for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
395            if purl == eq.canonical {
396                return Some(eq.canonical.clone());
397            }
398            if self.alias_matches(eq_idx, eq, purl) {
399                return Some(eq.canonical.clone());
400            }
401        }
402        None
403    }
404}
405
406/// Compile a glob pattern to a regex at construction time.
407fn compile_glob(pattern: &str) -> Result<Regex, String> {
408    let regex_pattern = pattern
409        .replace('.', "\\.")
410        .replace('*', ".*")
411        .replace('?', ".");
412
413    Regex::new(&format!("^{regex_pattern}$"))
414        .map_err(|e| format!("Invalid glob pattern '{pattern}': {e}"))
415}
416
417/// Simple glob pattern matching (supports * and ?) - used only in tests
418#[cfg(test)]
419fn glob_matches(pattern: &str, text: &str) -> bool {
420    compile_glob(pattern)
421        .map(|re| re.is_match(text))
422        .unwrap_or(false)
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428
429    fn create_test_component(name: &str, purl: Option<&str>) -> Component {
430        use crate::model::*;
431        let mut comp = Component::new(name.to_string(), purl.unwrap_or(name).to_string());
432        comp.version = Some("1.0.0".to_string());
433        comp.identifiers.purl = purl.map(|s| s.to_string());
434        comp.ecosystem = Some(Ecosystem::Npm);
435        comp
436    }
437
438    #[test]
439    fn test_glob_matches() {
440        assert!(glob_matches("pkg:npm/*", "pkg:npm/lodash"));
441        assert!(glob_matches("pkg:npm/lodash*", "pkg:npm/lodash-es"));
442        assert!(!glob_matches("pkg:npm/*", "pkg:maven/test"));
443        assert!(glob_matches("*.json", "test.json"));
444    }
445
446    #[test]
447    fn test_exact_exclusion() {
448        let config = MatchingRulesConfig {
449            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
450            ..Default::default()
451        };
452        let engine = RuleEngine::new(config).unwrap();
453
454        assert!(engine.is_excluded("pkg:npm/jest"));
455        assert!(!engine.is_excluded("pkg:npm/lodash"));
456    }
457
458    #[test]
459    fn test_pattern_exclusion() {
460        let config = MatchingRulesConfig {
461            exclusions: vec![ExclusionRule::pattern("pkg:npm/test-*")],
462            ..Default::default()
463        };
464        let engine = RuleEngine::new(config).unwrap();
465
466        assert!(engine.is_excluded("pkg:npm/test-utils"));
467        assert!(engine.is_excluded("pkg:npm/test-runner"));
468        assert!(!engine.is_excluded("pkg:npm/lodash"));
469    }
470
471    #[test]
472    fn test_equivalence_matching() {
473        let config = MatchingRulesConfig {
474            equivalences: vec![EquivalenceGroup {
475                name: Some("Lodash".to_string()),
476                canonical: "pkg:npm/lodash".to_string(),
477                aliases: vec![
478                    AliasPattern::exact("pkg:npm/lodash-es"),
479                    AliasPattern::glob("pkg:npm/lodash.*"),
480                ],
481                version_sensitive: false,
482            }],
483            ..Default::default()
484        };
485        let engine = RuleEngine::new(config).unwrap();
486
487        assert_eq!(
488            engine.get_canonical("pkg:npm/lodash"),
489            Some("pkg:npm/lodash".to_string())
490        );
491        assert_eq!(
492            engine.get_canonical("pkg:npm/lodash-es"),
493            Some("pkg:npm/lodash".to_string())
494        );
495        assert_eq!(
496            engine.get_canonical("pkg:npm/lodash.min"),
497            Some("pkg:npm/lodash".to_string())
498        );
499        assert_eq!(engine.get_canonical("pkg:npm/underscore"), None);
500    }
501
502    #[test]
503    fn test_apply_rules() {
504        let config = MatchingRulesConfig {
505            equivalences: vec![EquivalenceGroup {
506                name: Some("Lodash".to_string()),
507                canonical: "pkg:npm/lodash".to_string(),
508                aliases: vec![AliasPattern::exact("pkg:npm/lodash-es")],
509                version_sensitive: false,
510            }],
511            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
512            ..Default::default()
513        };
514        let engine = RuleEngine::new(config).unwrap();
515
516        let mut components = IndexMap::new();
517        components.insert(
518            CanonicalId::from_purl("pkg:npm/lodash-es"),
519            create_test_component("lodash-es", Some("pkg:npm/lodash-es")),
520        );
521        components.insert(
522            CanonicalId::from_purl("pkg:npm/jest"),
523            create_test_component("jest", Some("pkg:npm/jest")),
524        );
525        components.insert(
526            CanonicalId::from_purl("pkg:npm/react"),
527            create_test_component("react", Some("pkg:npm/react")),
528        );
529
530        let result = engine.apply(&components);
531
532        // lodash-es should be mapped to canonical lodash
533        assert!(result
534            .canonical_map
535            .contains_key(&CanonicalId::from_purl("pkg:npm/lodash-es")));
536
537        // jest should be excluded
538        assert!(result
539            .excluded
540            .contains(&CanonicalId::from_purl("pkg:npm/jest")));
541
542        // react should have no rules applied
543        assert!(!result
544            .canonical_map
545            .contains_key(&CanonicalId::from_purl("pkg:npm/react")));
546        assert!(!result
547            .excluded
548            .contains(&CanonicalId::from_purl("pkg:npm/react")));
549
550        // Check applied rules
551        assert_eq!(result.applied_rules.len(), 2);
552    }
553}