Skip to main content

sbom_tools/matching/
rule_engine.rs

1//! Rule engine for applying custom matching rules.
2//!
3//! This module provides the engine that applies custom matching rules
4//! to components during the diff process.
5
6use indexmap::IndexMap;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10use crate::model::{CanonicalId, Component};
11
12use super::custom_rules::{AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig};
13
14/// Result of applying matching rules to components
15#[derive(Debug, Clone, Default)]
16pub struct RuleApplicationResult {
17    /// Original ID -> Canonical ID mapping (for equivalences)
18    pub canonical_map: HashMap<CanonicalId, CanonicalId>,
19    /// IDs that should be excluded from diff
20    pub excluded: HashSet<CanonicalId>,
21    /// Log of which rules were applied
22    pub applied_rules: Vec<AppliedRule>,
23}
24
25/// Record of a rule being applied to a component
26#[derive(Debug, Clone)]
27pub struct AppliedRule {
28    /// The component that was affected
29    pub component_id: CanonicalId,
30    /// The component name
31    pub component_name: String,
32    /// The type of rule applied
33    pub rule_type: AppliedRuleType,
34    /// Index of the rule in the config
35    pub rule_index: usize,
36    /// Name of the rule (if any)
37    pub rule_name: Option<String>,
38}
39
40/// Type of rule that was applied
41#[derive(Debug, Clone)]
42pub enum AppliedRuleType {
43    /// Component was mapped to a canonical ID
44    Equivalence { canonical: String },
45    /// Component was excluded
46    Exclusion { reason: Option<String> },
47}
48
49/// Engine for applying custom matching rules
50pub struct RuleEngine {
51    config: MatchingRulesConfig,
52    /// Compiled regex patterns for exclusions
53    compiled_exclusion_regexes: Vec<Option<Regex>>,
54    /// Compiled glob patterns for exclusions (converted to regex)
55    compiled_exclusion_globs: Vec<Option<Regex>>,
56    /// Compiled regex patterns for equivalence aliases
57    compiled_alias_regexes: Vec<Vec<Option<Regex>>>,
58    /// Compiled glob patterns for equivalence aliases (converted to regex)
59    compiled_alias_globs: Vec<Vec<Option<Regex>>>,
60}
61
62impl RuleEngine {
63    /// Create a new rule engine from configuration
64    pub fn new(config: MatchingRulesConfig) -> Result<Self, String> {
65        // Pre-compile regex patterns for exclusions
66        let compiled_exclusion_regexes = config
67            .exclusions
68            .iter()
69            .map(|rule| match rule {
70                ExclusionRule::Exact(_) => Ok(None),
71                ExclusionRule::Conditional { regex, .. } => {
72                    if let Some(re) = regex {
73                        Regex::new(re)
74                            .map(Some)
75                            .map_err(|e| format!("Invalid exclusion regex '{}': {}", re, e))
76                    } else {
77                        Ok(None)
78                    }
79                }
80            })
81            .collect::<Result<Vec<_>, _>>()?;
82
83        // Pre-compile glob patterns for exclusions
84        let compiled_exclusion_globs = config
85            .exclusions
86            .iter()
87            .map(|rule| match rule {
88                ExclusionRule::Exact(_) => Ok(None),
89                ExclusionRule::Conditional { pattern, .. } => {
90                    if let Some(pat) = pattern {
91                        compile_glob(pat).map(Some)
92                    } else {
93                        Ok(None)
94                    }
95                }
96            })
97            .collect::<Result<Vec<_>, _>>()?;
98
99        // Pre-compile regex patterns for equivalence aliases
100        let compiled_alias_regexes = config
101            .equivalences
102            .iter()
103            .map(|eq| {
104                eq.aliases
105                    .iter()
106                    .map(|alias| match alias {
107                        AliasPattern::Exact(_) => Ok(None),
108                        AliasPattern::Pattern { regex, .. } => {
109                            if let Some(re) = regex {
110                                Regex::new(re)
111                                    .map(Some)
112                                    .map_err(|e| format!("Invalid alias regex '{}': {}", re, e))
113                            } else {
114                                Ok(None)
115                            }
116                        }
117                    })
118                    .collect::<Result<Vec<_>, _>>()
119            })
120            .collect::<Result<Vec<_>, _>>()?;
121
122        // Pre-compile glob patterns for equivalence aliases
123        let compiled_alias_globs = config
124            .equivalences
125            .iter()
126            .map(|eq| {
127                eq.aliases
128                    .iter()
129                    .map(|alias| match alias {
130                        AliasPattern::Exact(_) => Ok(None),
131                        AliasPattern::Pattern { pattern, .. } => {
132                            if let Some(pat) = pattern {
133                                compile_glob(pat).map(Some)
134                            } else {
135                                Ok(None)
136                            }
137                        }
138                    })
139                    .collect::<Result<Vec<_>, _>>()
140            })
141            .collect::<Result<Vec<_>, _>>()?;
142
143        Ok(Self {
144            config,
145            compiled_exclusion_regexes,
146            compiled_exclusion_globs,
147            compiled_alias_regexes,
148            compiled_alias_globs,
149        })
150    }
151
152    /// Apply rules to a set of components
153    pub fn apply(&self, components: &IndexMap<CanonicalId, Component>) -> RuleApplicationResult {
154        let mut result = RuleApplicationResult::default();
155
156        for (id, component) in components {
157            // Check exclusions first
158            if let Some(applied) = self.check_exclusions(id, component) {
159                result.excluded.insert(id.clone());
160                result.applied_rules.push(applied);
161                continue;
162            }
163
164            // Check equivalences
165            if let Some((canonical_id, applied)) = self.check_equivalences(id, component) {
166                result.canonical_map.insert(id.clone(), canonical_id);
167                result.applied_rules.push(applied);
168            }
169        }
170
171        result
172    }
173
174    /// Check if a component should be excluded
175    fn check_exclusions(&self, id: &CanonicalId, component: &Component) -> Option<AppliedRule> {
176        for (idx, rule) in self.config.exclusions.iter().enumerate() {
177            if self.exclusion_matches(rule, idx, component) {
178                return Some(AppliedRule {
179                    component_id: id.clone(),
180                    component_name: component.name.clone(),
181                    rule_type: AppliedRuleType::Exclusion {
182                        reason: rule.get_reason().map(|s| s.to_string()),
183                    },
184                    rule_index: idx,
185                    rule_name: None,
186                });
187            }
188        }
189        None
190    }
191
192    /// Check if an exclusion rule matches a component
193    fn exclusion_matches(
194        &self,
195        rule: &ExclusionRule,
196        rule_idx: usize,
197        component: &Component,
198    ) -> bool {
199        match rule {
200            ExclusionRule::Exact(purl) => component
201                .identifiers
202                .purl
203                .as_ref()
204                .map(|p| p == purl)
205                .unwrap_or(false),
206            ExclusionRule::Conditional {
207                pattern,
208                regex: _,
209                ecosystem,
210                name,
211                scope: _,
212                reason: _,
213            } => {
214                // Check ecosystem
215                if let Some(eco) = ecosystem {
216                    let comp_eco = component
217                        .ecosystem
218                        .as_ref()
219                        .map(|e| e.to_string().to_lowercase());
220                    if comp_eco.as_deref() != Some(&eco.to_lowercase()) {
221                        return false;
222                    }
223                }
224
225                // Check name
226                if let Some(n) = name {
227                    if !component.name.to_lowercase().contains(&n.to_lowercase()) {
228                        return false;
229                    }
230                }
231
232                // Check pre-compiled glob pattern
233                if pattern.is_some() {
234                    if let Some(purl) = &component.identifiers.purl {
235                        if let Some(Some(re)) = self.compiled_exclusion_globs.get(rule_idx) {
236                            if !re.is_match(purl) {
237                                return false;
238                            }
239                        }
240                    } else {
241                        return false;
242                    }
243                }
244
245                // Check compiled regex
246                if let Some(Some(re)) = self.compiled_exclusion_regexes.get(rule_idx) {
247                    if let Some(purl) = &component.identifiers.purl {
248                        if !re.is_match(purl) {
249                            return false;
250                        }
251                    } else {
252                        return false;
253                    }
254                }
255
256                // If we get here and at least one condition was specified, it matched
257                ecosystem.is_some()
258                    || name.is_some()
259                    || pattern.is_some()
260                    || self
261                        .compiled_exclusion_regexes
262                        .get(rule_idx)
263                        .map(|r| r.is_some())
264                        .unwrap_or(false)
265            }
266        }
267    }
268
269    /// Check if a component matches any equivalence group
270    fn check_equivalences(
271        &self,
272        id: &CanonicalId,
273        component: &Component,
274    ) -> Option<(CanonicalId, AppliedRule)> {
275        let purl = component.identifiers.purl.as_ref()?;
276
277        for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
278            // Check if the PURL matches the canonical or any alias
279            let matches_canonical = purl == &eq.canonical;
280            let matches_alias = self.alias_matches(eq_idx, eq, purl);
281
282            if matches_canonical || matches_alias {
283                let canonical_id = CanonicalId::from_purl(&eq.canonical);
284                let applied = AppliedRule {
285                    component_id: id.clone(),
286                    component_name: component.name.clone(),
287                    rule_type: AppliedRuleType::Equivalence {
288                        canonical: eq.canonical.clone(),
289                    },
290                    rule_index: eq_idx,
291                    rule_name: eq.name.clone(),
292                };
293                return Some((canonical_id, applied));
294            }
295        }
296
297        None
298    }
299
300    /// Check if a PURL matches any alias in an equivalence group
301    fn alias_matches(&self, eq_idx: usize, eq: &EquivalenceGroup, purl: &str) -> bool {
302        let alias_regexes = self.compiled_alias_regexes.get(eq_idx);
303        let alias_globs = self.compiled_alias_globs.get(eq_idx);
304
305        for (alias_idx, alias) in eq.aliases.iter().enumerate() {
306            let matches = match alias {
307                AliasPattern::Exact(exact_purl) => purl == exact_purl,
308                AliasPattern::Pattern {
309                    pattern: _,
310                    regex: _,
311                    ecosystem,
312                    name,
313                } => {
314                    let mut matched = false;
315
316                    // Check pre-compiled glob pattern
317                    if let Some(Some(re)) = alias_globs.and_then(|v| v.get(alias_idx)) {
318                        if re.is_match(purl) {
319                            matched = true;
320                        }
321                    }
322
323                    // Check regex
324                    if let Some(Some(re)) = alias_regexes.and_then(|v| v.get(alias_idx)) {
325                        if re.is_match(purl) {
326                            matched = true;
327                        }
328                    }
329
330                    // Check ecosystem match in PURL
331                    if let Some(eco) = ecosystem {
332                        let purl_lower = purl.to_lowercase();
333                        let eco_lower = eco.to_lowercase();
334                        // Check if PURL starts with pkg:<ecosystem>/
335                        if purl_lower.starts_with("pkg:") {
336                            if let Some(rest) = purl_lower.strip_prefix("pkg:") {
337                                if rest.starts_with(&eco_lower)
338                                    && rest[eco_lower.len()..].starts_with('/')
339                                {
340                                    matched = true;
341                                }
342                            }
343                        }
344                    }
345
346                    // Check name match in PURL
347                    if let Some(n) = name {
348                        if purl.to_lowercase().contains(&n.to_lowercase()) {
349                            matched = true;
350                        }
351                    }
352
353                    matched
354                }
355            };
356
357            if matches {
358                return true;
359            }
360        }
361
362        false
363    }
364
365    /// Get the configuration
366    pub fn config(&self) -> &MatchingRulesConfig {
367        &self.config
368    }
369
370    /// Check if a PURL is excluded by any rule
371    pub fn is_excluded(&self, purl: &str) -> bool {
372        for (idx, rule) in self.config.exclusions.iter().enumerate() {
373            match rule {
374                ExclusionRule::Exact(exact) => {
375                    if purl == exact {
376                        return true;
377                    }
378                }
379                ExclusionRule::Conditional { pattern, .. } => {
380                    // Check pre-compiled glob pattern
381                    if pattern.is_some() {
382                        if let Some(Some(re)) = self.compiled_exclusion_globs.get(idx) {
383                            if re.is_match(purl) {
384                                return true;
385                            }
386                        }
387                    }
388                    // Check pre-compiled regex
389                    if let Some(Some(re)) = self.compiled_exclusion_regexes.get(idx) {
390                        if re.is_match(purl) {
391                            return true;
392                        }
393                    }
394                }
395            }
396        }
397        false
398    }
399
400    /// Get the canonical PURL for a given PURL, if any equivalence applies
401    pub fn get_canonical(&self, purl: &str) -> Option<String> {
402        for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
403            if purl == eq.canonical {
404                return Some(eq.canonical.clone());
405            }
406            if self.alias_matches(eq_idx, eq, purl) {
407                return Some(eq.canonical.clone());
408            }
409        }
410        None
411    }
412}
413
414/// Compile a glob pattern to a regex at construction time.
415fn compile_glob(pattern: &str) -> Result<Regex, String> {
416    let regex_pattern = pattern
417        .replace('.', "\\.")
418        .replace('*', ".*")
419        .replace('?', ".");
420
421    Regex::new(&format!("^{}$", regex_pattern))
422        .map_err(|e| format!("Invalid glob pattern '{}': {}", pattern, e))
423}
424
425/// Simple glob pattern matching (supports * and ?) - used only in tests
426#[cfg(test)]
427fn glob_matches(pattern: &str, text: &str) -> bool {
428    compile_glob(pattern)
429        .map(|re| re.is_match(text))
430        .unwrap_or(false)
431}
432
433#[cfg(test)]
434mod tests {
435    use super::*;
436
437    fn create_test_component(name: &str, purl: Option<&str>) -> Component {
438        use crate::model::*;
439        let mut comp = Component::new(name.to_string(), purl.unwrap_or(name).to_string());
440        comp.version = Some("1.0.0".to_string());
441        comp.identifiers.purl = purl.map(|s| s.to_string());
442        comp.ecosystem = Some(Ecosystem::Npm);
443        comp
444    }
445
446    #[test]
447    fn test_glob_matches() {
448        assert!(glob_matches("pkg:npm/*", "pkg:npm/lodash"));
449        assert!(glob_matches("pkg:npm/lodash*", "pkg:npm/lodash-es"));
450        assert!(!glob_matches("pkg:npm/*", "pkg:maven/test"));
451        assert!(glob_matches("*.json", "test.json"));
452    }
453
454    #[test]
455    fn test_exact_exclusion() {
456        let config = MatchingRulesConfig {
457            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
458            ..Default::default()
459        };
460        let engine = RuleEngine::new(config).unwrap();
461
462        assert!(engine.is_excluded("pkg:npm/jest"));
463        assert!(!engine.is_excluded("pkg:npm/lodash"));
464    }
465
466    #[test]
467    fn test_pattern_exclusion() {
468        let config = MatchingRulesConfig {
469            exclusions: vec![ExclusionRule::pattern("pkg:npm/test-*")],
470            ..Default::default()
471        };
472        let engine = RuleEngine::new(config).unwrap();
473
474        assert!(engine.is_excluded("pkg:npm/test-utils"));
475        assert!(engine.is_excluded("pkg:npm/test-runner"));
476        assert!(!engine.is_excluded("pkg:npm/lodash"));
477    }
478
479    #[test]
480    fn test_equivalence_matching() {
481        let config = MatchingRulesConfig {
482            equivalences: vec![EquivalenceGroup {
483                name: Some("Lodash".to_string()),
484                canonical: "pkg:npm/lodash".to_string(),
485                aliases: vec![
486                    AliasPattern::exact("pkg:npm/lodash-es"),
487                    AliasPattern::glob("pkg:npm/lodash.*"),
488                ],
489                version_sensitive: false,
490            }],
491            ..Default::default()
492        };
493        let engine = RuleEngine::new(config).unwrap();
494
495        assert_eq!(
496            engine.get_canonical("pkg:npm/lodash"),
497            Some("pkg:npm/lodash".to_string())
498        );
499        assert_eq!(
500            engine.get_canonical("pkg:npm/lodash-es"),
501            Some("pkg:npm/lodash".to_string())
502        );
503        assert_eq!(
504            engine.get_canonical("pkg:npm/lodash.min"),
505            Some("pkg:npm/lodash".to_string())
506        );
507        assert_eq!(engine.get_canonical("pkg:npm/underscore"), None);
508    }
509
510    #[test]
511    fn test_apply_rules() {
512        let config = MatchingRulesConfig {
513            equivalences: vec![EquivalenceGroup {
514                name: Some("Lodash".to_string()),
515                canonical: "pkg:npm/lodash".to_string(),
516                aliases: vec![AliasPattern::exact("pkg:npm/lodash-es")],
517                version_sensitive: false,
518            }],
519            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
520            ..Default::default()
521        };
522        let engine = RuleEngine::new(config).unwrap();
523
524        let mut components = IndexMap::new();
525        components.insert(
526            CanonicalId::from_purl("pkg:npm/lodash-es"),
527            create_test_component("lodash-es", Some("pkg:npm/lodash-es")),
528        );
529        components.insert(
530            CanonicalId::from_purl("pkg:npm/jest"),
531            create_test_component("jest", Some("pkg:npm/jest")),
532        );
533        components.insert(
534            CanonicalId::from_purl("pkg:npm/react"),
535            create_test_component("react", Some("pkg:npm/react")),
536        );
537
538        let result = engine.apply(&components);
539
540        // lodash-es should be mapped to canonical lodash
541        assert!(result
542            .canonical_map
543            .contains_key(&CanonicalId::from_purl("pkg:npm/lodash-es")));
544
545        // jest should be excluded
546        assert!(result
547            .excluded
548            .contains(&CanonicalId::from_purl("pkg:npm/jest")));
549
550        // react should have no rules applied
551        assert!(!result
552            .canonical_map
553            .contains_key(&CanonicalId::from_purl("pkg:npm/react")));
554        assert!(!result
555            .excluded
556            .contains(&CanonicalId::from_purl("pkg:npm/react")));
557
558        // Check applied rules
559        assert_eq!(result.applied_rules.len(), 2);
560    }
561}