Skip to main content

sbom_tools/matching/
rule_engine.rs

1//! Rule engine for applying custom matching rules.
2//!
3//! This module provides the engine that applies custom matching rules
4//! to components during the diff process.
5
6use indexmap::IndexMap;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10use crate::model::{CanonicalId, Component};
11
12use super::custom_rules::{AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig};
13
14/// Result of applying matching rules to components
15#[derive(Debug, Clone, Default)]
16pub struct RuleApplicationResult {
17    /// Original ID -> Canonical ID mapping (for equivalences)
18    pub canonical_map: HashMap<CanonicalId, CanonicalId>,
19    /// IDs that should be excluded from diff
20    pub excluded: HashSet<CanonicalId>,
21    /// Log of which rules were applied
22    pub applied_rules: Vec<AppliedRule>,
23}
24
25/// Record of a rule being applied to a component
26#[derive(Debug, Clone)]
27pub struct AppliedRule {
28    /// The component that was affected
29    pub component_id: CanonicalId,
30    /// The component name
31    pub component_name: String,
32    /// The type of rule applied
33    pub rule_type: AppliedRuleType,
34    /// Index of the rule in the config
35    pub rule_index: usize,
36    /// Name of the rule (if any)
37    pub rule_name: Option<String>,
38}
39
40/// Type of rule that was applied
41#[derive(Debug, Clone)]
42pub enum AppliedRuleType {
43    /// Component was mapped to a canonical ID
44    Equivalence { canonical: String },
45    /// Component was excluded
46    Exclusion { reason: Option<String> },
47}
48
49/// Engine for applying custom matching rules
50pub struct RuleEngine {
51    config: MatchingRulesConfig,
52    /// Compiled regex patterns for exclusions
53    compiled_exclusion_regexes: Vec<Option<Regex>>,
54    /// Compiled glob patterns for exclusions (converted to regex)
55    compiled_exclusion_globs: Vec<Option<Regex>>,
56    /// Compiled regex patterns for equivalence aliases
57    compiled_alias_regexes: Vec<Vec<Option<Regex>>>,
58    /// Compiled glob patterns for equivalence aliases (converted to regex)
59    compiled_alias_globs: Vec<Vec<Option<Regex>>>,
60}
61
62impl RuleEngine {
63    /// Create a new rule engine from configuration
64    pub fn new(config: MatchingRulesConfig) -> Result<Self, String> {
65        // Pre-compile regex patterns for exclusions
66        let compiled_exclusion_regexes = config
67            .exclusions
68            .iter()
69            .map(|rule| match rule {
70                ExclusionRule::Exact(_) => Ok(None),
71                ExclusionRule::Conditional { regex, .. } => {
72                    regex.as_ref().map_or_else(
73                        || Ok(None),
74                        |re| Regex::new(re)
75                            .map(Some)
76                            .map_err(|e| format!("Invalid exclusion regex '{re}': {e}")),
77                    )
78                }
79            })
80            .collect::<Result<Vec<_>, _>>()?;
81
82        // Pre-compile glob patterns for exclusions
83        let compiled_exclusion_globs = config
84            .exclusions
85            .iter()
86            .map(|rule| match rule {
87                ExclusionRule::Exact(_) => Ok(None),
88                ExclusionRule::Conditional { pattern, .. } => {
89                    pattern.as_ref().map_or_else(|| Ok(None), |pat| compile_glob(pat).map(Some))
90                }
91            })
92            .collect::<Result<Vec<_>, _>>()?;
93
94        // Pre-compile regex patterns for equivalence aliases
95        let compiled_alias_regexes = config
96            .equivalences
97            .iter()
98            .map(|eq| {
99                eq.aliases
100                    .iter()
101                    .map(|alias| match alias {
102                        AliasPattern::Exact(_) => Ok(None),
103                        AliasPattern::Pattern { regex, .. } => {
104                            regex.as_ref().map_or_else(
105                                || Ok(None),
106                                |re| Regex::new(re)
107                                    .map(Some)
108                                    .map_err(|e| format!("Invalid alias regex '{re}': {e}")),
109                            )
110                        }
111                    })
112                    .collect::<Result<Vec<_>, _>>()
113            })
114            .collect::<Result<Vec<_>, _>>()?;
115
116        // Pre-compile glob patterns for equivalence aliases
117        let compiled_alias_globs = config
118            .equivalences
119            .iter()
120            .map(|eq| {
121                eq.aliases
122                    .iter()
123                    .map(|alias| match alias {
124                        AliasPattern::Exact(_) => Ok(None),
125                        AliasPattern::Pattern { pattern, .. } => {
126                            pattern.as_ref().map_or_else(|| Ok(None), |pat| compile_glob(pat).map(Some))
127                        }
128                    })
129                    .collect::<Result<Vec<_>, _>>()
130            })
131            .collect::<Result<Vec<_>, _>>()?;
132
133        Ok(Self {
134            config,
135            compiled_exclusion_regexes,
136            compiled_exclusion_globs,
137            compiled_alias_regexes,
138            compiled_alias_globs,
139        })
140    }
141
142    /// Apply rules to a set of components
143    #[must_use] 
144    pub fn apply(&self, components: &IndexMap<CanonicalId, Component>) -> RuleApplicationResult {
145        let mut result = RuleApplicationResult::default();
146
147        for (id, component) in components {
148            // Check exclusions first
149            if let Some(applied) = self.check_exclusions(id, component) {
150                result.excluded.insert(id.clone());
151                result.applied_rules.push(applied);
152                continue;
153            }
154
155            // Check equivalences
156            if let Some((canonical_id, applied)) = self.check_equivalences(id, component) {
157                result.canonical_map.insert(id.clone(), canonical_id);
158                result.applied_rules.push(applied);
159            }
160        }
161
162        result
163    }
164
165    /// Check if a component should be excluded
166    fn check_exclusions(&self, id: &CanonicalId, component: &Component) -> Option<AppliedRule> {
167        for (idx, rule) in self.config.exclusions.iter().enumerate() {
168            if self.exclusion_matches(rule, idx, component) {
169                return Some(AppliedRule {
170                    component_id: id.clone(),
171                    component_name: component.name.clone(),
172                    rule_type: AppliedRuleType::Exclusion {
173                        reason: rule.get_reason().map(std::string::ToString::to_string),
174                    },
175                    rule_index: idx,
176                    rule_name: None,
177                });
178            }
179        }
180        None
181    }
182
183    /// Check if an exclusion rule matches a component
184    fn exclusion_matches(
185        &self,
186        rule: &ExclusionRule,
187        rule_idx: usize,
188        component: &Component,
189    ) -> bool {
190        match rule {
191            ExclusionRule::Exact(purl) => component
192                .identifiers
193                .purl
194                .as_ref()
195                .is_some_and(|p| p == purl),
196            ExclusionRule::Conditional {
197                pattern,
198                regex: _,
199                ecosystem,
200                name,
201                scope: _,
202                reason: _,
203            } => {
204                // Check ecosystem
205                if let Some(eco) = ecosystem {
206                    let comp_eco = component
207                        .ecosystem
208                        .as_ref()
209                        .map(|e| e.to_string().to_lowercase());
210                    if comp_eco.as_deref() != Some(&eco.to_lowercase()) {
211                        return false;
212                    }
213                }
214
215                // Check name
216                if let Some(n) = name
217                    && !component.name.to_lowercase().contains(&n.to_lowercase()) {
218                        return false;
219                    }
220
221                // Check pre-compiled glob pattern
222                if pattern.is_some() {
223                    if let Some(purl) = &component.identifiers.purl {
224                        if let Some(Some(re)) = self.compiled_exclusion_globs.get(rule_idx)
225                            && !re.is_match(purl) {
226                                return false;
227                            }
228                    } else {
229                        return false;
230                    }
231                }
232
233                // Check compiled regex
234                if let Some(Some(re)) = self.compiled_exclusion_regexes.get(rule_idx) {
235                    if let Some(purl) = &component.identifiers.purl {
236                        if !re.is_match(purl) {
237                            return false;
238                        }
239                    } else {
240                        return false;
241                    }
242                }
243
244                // If we get here and at least one condition was specified, it matched
245                ecosystem.is_some()
246                    || name.is_some()
247                    || pattern.is_some()
248                    || self
249                        .compiled_exclusion_regexes
250                        .get(rule_idx)
251                        .is_some_and(std::option::Option::is_some)
252            }
253        }
254    }
255
256    /// Check if a component matches any equivalence group
257    fn check_equivalences(
258        &self,
259        id: &CanonicalId,
260        component: &Component,
261    ) -> Option<(CanonicalId, AppliedRule)> {
262        let purl = component.identifiers.purl.as_ref()?;
263
264        for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
265            // Check if the PURL matches the canonical or any alias
266            let matches_canonical = purl == &eq.canonical;
267            let matches_alias = self.alias_matches(eq_idx, eq, purl);
268
269            if matches_canonical || matches_alias {
270                let canonical_id = CanonicalId::from_purl(&eq.canonical);
271                let applied = AppliedRule {
272                    component_id: id.clone(),
273                    component_name: component.name.clone(),
274                    rule_type: AppliedRuleType::Equivalence {
275                        canonical: eq.canonical.clone(),
276                    },
277                    rule_index: eq_idx,
278                    rule_name: eq.name.clone(),
279                };
280                return Some((canonical_id, applied));
281            }
282        }
283
284        None
285    }
286
287    /// Check if a PURL matches any alias in an equivalence group
288    fn alias_matches(&self, eq_idx: usize, eq: &EquivalenceGroup, purl: &str) -> bool {
289        let alias_regexes = self.compiled_alias_regexes.get(eq_idx);
290        let alias_globs = self.compiled_alias_globs.get(eq_idx);
291
292        for (alias_idx, alias) in eq.aliases.iter().enumerate() {
293            let matches = match alias {
294                AliasPattern::Exact(exact_purl) => purl == exact_purl,
295                AliasPattern::Pattern {
296                    pattern: _,
297                    regex: _,
298                    ecosystem,
299                    name,
300                } => {
301                    let mut matched = false;
302
303                    // Check pre-compiled glob pattern
304                    if let Some(Some(re)) = alias_globs.and_then(|v| v.get(alias_idx))
305                        && re.is_match(purl) {
306                            matched = true;
307                        }
308
309                    // Check regex
310                    if let Some(Some(re)) = alias_regexes.and_then(|v| v.get(alias_idx))
311                        && re.is_match(purl) {
312                            matched = true;
313                        }
314
315                    // Check ecosystem match in PURL
316                    if let Some(eco) = ecosystem {
317                        let purl_lower = purl.to_lowercase();
318                        let eco_lower = eco.to_lowercase();
319                        // Check if PURL starts with pkg:<ecosystem>/
320                        if purl_lower.starts_with("pkg:")
321                            && let Some(rest) = purl_lower.strip_prefix("pkg:")
322                                && rest.starts_with(&eco_lower)
323                                    && rest[eco_lower.len()..].starts_with('/')
324                                {
325                                    matched = true;
326                                }
327                    }
328
329                    // Check name match in PURL
330                    if let Some(n) = name
331                        && purl.to_lowercase().contains(&n.to_lowercase()) {
332                            matched = true;
333                        }
334
335                    matched
336                }
337            };
338
339            if matches {
340                return true;
341            }
342        }
343
344        false
345    }
346
347    /// Get the configuration
348    #[must_use] 
349    pub const fn config(&self) -> &MatchingRulesConfig {
350        &self.config
351    }
352
353    /// Check if a PURL is excluded by any rule
354    #[must_use] 
355    pub fn is_excluded(&self, purl: &str) -> bool {
356        for (idx, rule) in self.config.exclusions.iter().enumerate() {
357            match rule {
358                ExclusionRule::Exact(exact) => {
359                    if purl == exact {
360                        return true;
361                    }
362                }
363                ExclusionRule::Conditional { pattern, .. } => {
364                    // Check pre-compiled glob pattern
365                    if pattern.is_some()
366                        && let Some(Some(re)) = self.compiled_exclusion_globs.get(idx)
367                            && re.is_match(purl) {
368                                return true;
369                            }
370                    // Check pre-compiled regex
371                    if let Some(Some(re)) = self.compiled_exclusion_regexes.get(idx)
372                        && re.is_match(purl) {
373                            return true;
374                        }
375                }
376            }
377        }
378        false
379    }
380
381    /// Get the canonical PURL for a given PURL, if any equivalence applies
382    #[must_use] 
383    pub fn get_canonical(&self, purl: &str) -> Option<String> {
384        for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
385            if purl == eq.canonical {
386                return Some(eq.canonical.clone());
387            }
388            if self.alias_matches(eq_idx, eq, purl) {
389                return Some(eq.canonical.clone());
390            }
391        }
392        None
393    }
394}
395
396/// Compile a glob pattern to a regex at construction time.
397fn compile_glob(pattern: &str) -> Result<Regex, String> {
398    let regex_pattern = pattern
399        .replace('.', "\\.")
400        .replace('*', ".*")
401        .replace('?', ".");
402
403    Regex::new(&format!("^{regex_pattern}$"))
404        .map_err(|e| format!("Invalid glob pattern '{pattern}': {e}"))
405}
406
407/// Simple glob pattern matching (supports * and ?) - used only in tests
408#[cfg(test)]
409fn glob_matches(pattern: &str, text: &str) -> bool {
410    compile_glob(pattern)
411        .map(|re| re.is_match(text))
412        .unwrap_or(false)
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418
419    fn create_test_component(name: &str, purl: Option<&str>) -> Component {
420        use crate::model::*;
421        let mut comp = Component::new(name.to_string(), purl.unwrap_or(name).to_string());
422        comp.version = Some("1.0.0".to_string());
423        comp.identifiers.purl = purl.map(|s| s.to_string());
424        comp.ecosystem = Some(Ecosystem::Npm);
425        comp
426    }
427
428    #[test]
429    fn test_glob_matches() {
430        assert!(glob_matches("pkg:npm/*", "pkg:npm/lodash"));
431        assert!(glob_matches("pkg:npm/lodash*", "pkg:npm/lodash-es"));
432        assert!(!glob_matches("pkg:npm/*", "pkg:maven/test"));
433        assert!(glob_matches("*.json", "test.json"));
434    }
435
436    #[test]
437    fn test_exact_exclusion() {
438        let config = MatchingRulesConfig {
439            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
440            ..Default::default()
441        };
442        let engine = RuleEngine::new(config).unwrap();
443
444        assert!(engine.is_excluded("pkg:npm/jest"));
445        assert!(!engine.is_excluded("pkg:npm/lodash"));
446    }
447
448    #[test]
449    fn test_pattern_exclusion() {
450        let config = MatchingRulesConfig {
451            exclusions: vec![ExclusionRule::pattern("pkg:npm/test-*")],
452            ..Default::default()
453        };
454        let engine = RuleEngine::new(config).unwrap();
455
456        assert!(engine.is_excluded("pkg:npm/test-utils"));
457        assert!(engine.is_excluded("pkg:npm/test-runner"));
458        assert!(!engine.is_excluded("pkg:npm/lodash"));
459    }
460
461    #[test]
462    fn test_equivalence_matching() {
463        let config = MatchingRulesConfig {
464            equivalences: vec![EquivalenceGroup {
465                name: Some("Lodash".to_string()),
466                canonical: "pkg:npm/lodash".to_string(),
467                aliases: vec![
468                    AliasPattern::exact("pkg:npm/lodash-es"),
469                    AliasPattern::glob("pkg:npm/lodash.*"),
470                ],
471                version_sensitive: false,
472            }],
473            ..Default::default()
474        };
475        let engine = RuleEngine::new(config).unwrap();
476
477        assert_eq!(
478            engine.get_canonical("pkg:npm/lodash"),
479            Some("pkg:npm/lodash".to_string())
480        );
481        assert_eq!(
482            engine.get_canonical("pkg:npm/lodash-es"),
483            Some("pkg:npm/lodash".to_string())
484        );
485        assert_eq!(
486            engine.get_canonical("pkg:npm/lodash.min"),
487            Some("pkg:npm/lodash".to_string())
488        );
489        assert_eq!(engine.get_canonical("pkg:npm/underscore"), None);
490    }
491
492    #[test]
493    fn test_apply_rules() {
494        let config = MatchingRulesConfig {
495            equivalences: vec![EquivalenceGroup {
496                name: Some("Lodash".to_string()),
497                canonical: "pkg:npm/lodash".to_string(),
498                aliases: vec![AliasPattern::exact("pkg:npm/lodash-es")],
499                version_sensitive: false,
500            }],
501            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
502            ..Default::default()
503        };
504        let engine = RuleEngine::new(config).unwrap();
505
506        let mut components = IndexMap::new();
507        components.insert(
508            CanonicalId::from_purl("pkg:npm/lodash-es"),
509            create_test_component("lodash-es", Some("pkg:npm/lodash-es")),
510        );
511        components.insert(
512            CanonicalId::from_purl("pkg:npm/jest"),
513            create_test_component("jest", Some("pkg:npm/jest")),
514        );
515        components.insert(
516            CanonicalId::from_purl("pkg:npm/react"),
517            create_test_component("react", Some("pkg:npm/react")),
518        );
519
520        let result = engine.apply(&components);
521
522        // lodash-es should be mapped to canonical lodash
523        assert!(result
524            .canonical_map
525            .contains_key(&CanonicalId::from_purl("pkg:npm/lodash-es")));
526
527        // jest should be excluded
528        assert!(result
529            .excluded
530            .contains(&CanonicalId::from_purl("pkg:npm/jest")));
531
532        // react should have no rules applied
533        assert!(!result
534            .canonical_map
535            .contains_key(&CanonicalId::from_purl("pkg:npm/react")));
536        assert!(!result
537            .excluded
538            .contains(&CanonicalId::from_purl("pkg:npm/react")));
539
540        // Check applied rules
541        assert_eq!(result.applied_rules.len(), 2);
542    }
543}