Skip to main content

sbom_tools/matching/
rule_engine.rs

1//! Rule engine for applying custom matching rules.
2//!
3//! This module provides the engine that applies custom matching rules
4//! to components during the diff process.
5
6use indexmap::IndexMap;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10use crate::model::{CanonicalId, Component};
11
12use super::custom_rules::{AliasPattern, EquivalenceGroup, ExclusionRule, MatchingRulesConfig};
13
14/// Result of applying matching rules to components
15#[derive(Debug, Clone, Default)]
16pub struct RuleApplicationResult {
17    /// Original ID -> Canonical ID mapping (for equivalences)
18    pub canonical_map: HashMap<CanonicalId, CanonicalId>,
19    /// IDs that should be excluded from diff
20    pub excluded: HashSet<CanonicalId>,
21    /// Log of which rules were applied
22    pub applied_rules: Vec<AppliedRule>,
23}
24
25/// Record of a rule being applied to a component
26#[derive(Debug, Clone)]
27pub struct AppliedRule {
28    /// The component that was affected
29    pub component_id: CanonicalId,
30    /// The component name
31    pub component_name: String,
32    /// The type of rule applied
33    pub rule_type: AppliedRuleType,
34    /// Index of the rule in the config
35    pub rule_index: usize,
36    /// Name of the rule (if any)
37    pub rule_name: Option<String>,
38}
39
40/// Type of rule that was applied
41#[derive(Debug, Clone)]
42pub enum AppliedRuleType {
43    /// Component was mapped to a canonical ID
44    Equivalence { canonical: String },
45    /// Component was excluded
46    Exclusion { reason: Option<String> },
47}
48
49/// Engine for applying custom matching rules
50pub struct RuleEngine {
51    config: MatchingRulesConfig,
52    /// Compiled regex patterns for exclusions
53    compiled_exclusion_regexes: Vec<Option<Regex>>,
54    /// Compiled glob patterns for exclusions (converted to regex)
55    compiled_exclusion_globs: Vec<Option<Regex>>,
56    /// Compiled regex patterns for equivalence aliases
57    compiled_alias_regexes: Vec<Vec<Option<Regex>>>,
58    /// Compiled glob patterns for equivalence aliases (converted to regex)
59    compiled_alias_globs: Vec<Vec<Option<Regex>>>,
60}
61
62impl RuleEngine {
63    /// Create a new rule engine from configuration
64    pub fn new(config: MatchingRulesConfig) -> Result<Self, String> {
65        // Pre-compile regex patterns for exclusions
66        let compiled_exclusion_regexes = config
67            .exclusions
68            .iter()
69            .map(|rule| match rule {
70                ExclusionRule::Exact(_) => Ok(None),
71                ExclusionRule::Conditional { regex, .. } => regex.as_ref().map_or_else(
72                    || Ok(None),
73                    |re| {
74                        Regex::new(re)
75                            .map(Some)
76                            .map_err(|e| format!("Invalid exclusion regex '{re}': {e}"))
77                    },
78                ),
79            })
80            .collect::<Result<Vec<_>, _>>()?;
81
82        // Pre-compile glob patterns for exclusions
83        let compiled_exclusion_globs = config
84            .exclusions
85            .iter()
86            .map(|rule| match rule {
87                ExclusionRule::Exact(_) => Ok(None),
88                ExclusionRule::Conditional { pattern, .. } => pattern
89                    .as_ref()
90                    .map_or_else(|| Ok(None), |pat| compile_glob(pat).map(Some)),
91            })
92            .collect::<Result<Vec<_>, _>>()?;
93
94        // Pre-compile regex patterns for equivalence aliases
95        let compiled_alias_regexes = config
96            .equivalences
97            .iter()
98            .map(|eq| {
99                eq.aliases
100                    .iter()
101                    .map(|alias| match alias {
102                        AliasPattern::Exact(_) => Ok(None),
103                        AliasPattern::Pattern { regex, .. } => regex.as_ref().map_or_else(
104                            || Ok(None),
105                            |re| {
106                                Regex::new(re)
107                                    .map(Some)
108                                    .map_err(|e| format!("Invalid alias regex '{re}': {e}"))
109                            },
110                        ),
111                    })
112                    .collect::<Result<Vec<_>, _>>()
113            })
114            .collect::<Result<Vec<_>, _>>()?;
115
116        // Pre-compile glob patterns for equivalence aliases
117        let compiled_alias_globs = config
118            .equivalences
119            .iter()
120            .map(|eq| {
121                eq.aliases
122                    .iter()
123                    .map(|alias| match alias {
124                        AliasPattern::Exact(_) => Ok(None),
125                        AliasPattern::Pattern { pattern, .. } => pattern
126                            .as_ref()
127                            .map_or_else(|| Ok(None), |pat| compile_glob(pat).map(Some)),
128                    })
129                    .collect::<Result<Vec<_>, _>>()
130            })
131            .collect::<Result<Vec<_>, _>>()?;
132
133        Ok(Self {
134            config,
135            compiled_exclusion_regexes,
136            compiled_exclusion_globs,
137            compiled_alias_regexes,
138            compiled_alias_globs,
139        })
140    }
141
142    /// Apply rules to a set of components
143    #[must_use]
144    pub fn apply(&self, components: &IndexMap<CanonicalId, Component>) -> RuleApplicationResult {
145        let mut result = RuleApplicationResult::default();
146
147        for (id, component) in components {
148            // Check exclusions first
149            if let Some(applied) = self.check_exclusions(id, component) {
150                result.excluded.insert(id.clone());
151                result.applied_rules.push(applied);
152                continue;
153            }
154
155            // Check equivalences
156            if let Some((canonical_id, applied)) = self.check_equivalences(id, component) {
157                result.canonical_map.insert(id.clone(), canonical_id);
158                result.applied_rules.push(applied);
159            }
160        }
161
162        result
163    }
164
165    /// Check if a component should be excluded
166    fn check_exclusions(&self, id: &CanonicalId, component: &Component) -> Option<AppliedRule> {
167        for (idx, rule) in self.config.exclusions.iter().enumerate() {
168            if self.exclusion_matches(rule, idx, component) {
169                return Some(AppliedRule {
170                    component_id: id.clone(),
171                    component_name: component.name.clone(),
172                    rule_type: AppliedRuleType::Exclusion {
173                        reason: rule.get_reason().map(std::string::ToString::to_string),
174                    },
175                    rule_index: idx,
176                    rule_name: None,
177                });
178            }
179        }
180        None
181    }
182
183    /// Check if an exclusion rule matches a component
184    fn exclusion_matches(
185        &self,
186        rule: &ExclusionRule,
187        rule_idx: usize,
188        component: &Component,
189    ) -> bool {
190        match rule {
191            ExclusionRule::Exact(purl) => component
192                .identifiers
193                .purl
194                .as_ref()
195                .is_some_and(|p| p == purl),
196            ExclusionRule::Conditional {
197                pattern,
198                regex: _,
199                ecosystem,
200                name,
201                scope: _,
202                reason: _,
203            } => {
204                // Check ecosystem
205                if let Some(eco) = ecosystem {
206                    let comp_eco = component
207                        .ecosystem
208                        .as_ref()
209                        .map(|e| e.to_string().to_lowercase());
210                    if comp_eco.as_deref() != Some(&eco.to_lowercase()) {
211                        return false;
212                    }
213                }
214
215                // Check name
216                if let Some(n) = name
217                    && !component.name.to_lowercase().contains(&n.to_lowercase())
218                {
219                    return false;
220                }
221
222                // Check pre-compiled glob pattern
223                if pattern.is_some() {
224                    if let Some(purl) = &component.identifiers.purl {
225                        if let Some(Some(re)) = self.compiled_exclusion_globs.get(rule_idx)
226                            && !re.is_match(purl)
227                        {
228                            return false;
229                        }
230                    } else {
231                        return false;
232                    }
233                }
234
235                // Check compiled regex
236                if let Some(Some(re)) = self.compiled_exclusion_regexes.get(rule_idx) {
237                    if let Some(purl) = &component.identifiers.purl {
238                        if !re.is_match(purl) {
239                            return false;
240                        }
241                    } else {
242                        return false;
243                    }
244                }
245
246                // If we get here and at least one condition was specified, it matched
247                ecosystem.is_some()
248                    || name.is_some()
249                    || pattern.is_some()
250                    || self
251                        .compiled_exclusion_regexes
252                        .get(rule_idx)
253                        .is_some_and(std::option::Option::is_some)
254            }
255        }
256    }
257
258    /// Check if a component matches any equivalence group
259    fn check_equivalences(
260        &self,
261        id: &CanonicalId,
262        component: &Component,
263    ) -> Option<(CanonicalId, AppliedRule)> {
264        let purl = component.identifiers.purl.as_ref()?;
265
266        for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
267            // Check if the PURL matches the canonical or any alias
268            let matches_canonical = purl == &eq.canonical;
269            let matches_alias = self.alias_matches(eq_idx, eq, purl);
270
271            if matches_canonical || matches_alias {
272                let canonical_id = CanonicalId::from_purl(&eq.canonical);
273                let applied = AppliedRule {
274                    component_id: id.clone(),
275                    component_name: component.name.clone(),
276                    rule_type: AppliedRuleType::Equivalence {
277                        canonical: eq.canonical.clone(),
278                    },
279                    rule_index: eq_idx,
280                    rule_name: eq.name.clone(),
281                };
282                return Some((canonical_id, applied));
283            }
284        }
285
286        None
287    }
288
289    /// Check if a PURL matches any alias in an equivalence group
290    fn alias_matches(&self, eq_idx: usize, eq: &EquivalenceGroup, purl: &str) -> bool {
291        let alias_regexes = self.compiled_alias_regexes.get(eq_idx);
292        let alias_globs = self.compiled_alias_globs.get(eq_idx);
293
294        for (alias_idx, alias) in eq.aliases.iter().enumerate() {
295            let matches = match alias {
296                AliasPattern::Exact(exact_purl) => purl == exact_purl,
297                AliasPattern::Pattern {
298                    pattern: _,
299                    regex: _,
300                    ecosystem,
301                    name,
302                } => {
303                    let mut matched = false;
304
305                    // Check pre-compiled glob pattern
306                    if let Some(Some(re)) = alias_globs.and_then(|v| v.get(alias_idx))
307                        && re.is_match(purl)
308                    {
309                        matched = true;
310                    }
311
312                    // Check regex
313                    if let Some(Some(re)) = alias_regexes.and_then(|v| v.get(alias_idx))
314                        && re.is_match(purl)
315                    {
316                        matched = true;
317                    }
318
319                    // Check ecosystem match in PURL
320                    if let Some(eco) = ecosystem {
321                        let purl_lower = purl.to_lowercase();
322                        let eco_lower = eco.to_lowercase();
323                        // Check if PURL starts with pkg:<ecosystem>/
324                        if purl_lower.starts_with("pkg:")
325                            && let Some(rest) = purl_lower.strip_prefix("pkg:")
326                            && rest.starts_with(&eco_lower)
327                            && rest[eco_lower.len()..].starts_with('/')
328                        {
329                            matched = true;
330                        }
331                    }
332
333                    // Check name match in PURL
334                    if let Some(n) = name
335                        && purl.to_lowercase().contains(&n.to_lowercase())
336                    {
337                        matched = true;
338                    }
339
340                    matched
341                }
342            };
343
344            if matches {
345                return true;
346            }
347        }
348
349        false
350    }
351
352    /// Get the configuration
353    #[must_use]
354    pub const fn config(&self) -> &MatchingRulesConfig {
355        &self.config
356    }
357
358    /// Check if a PURL is excluded by any rule
359    #[must_use]
360    pub fn is_excluded(&self, purl: &str) -> bool {
361        for (idx, rule) in self.config.exclusions.iter().enumerate() {
362            match rule {
363                ExclusionRule::Exact(exact) => {
364                    if purl == exact {
365                        return true;
366                    }
367                }
368                ExclusionRule::Conditional { pattern, .. } => {
369                    // Check pre-compiled glob pattern
370                    if pattern.is_some()
371                        && let Some(Some(re)) = self.compiled_exclusion_globs.get(idx)
372                        && re.is_match(purl)
373                    {
374                        return true;
375                    }
376                    // Check pre-compiled regex
377                    if let Some(Some(re)) = self.compiled_exclusion_regexes.get(idx)
378                        && re.is_match(purl)
379                    {
380                        return true;
381                    }
382                }
383            }
384        }
385        false
386    }
387
388    /// Get the canonical PURL for a given PURL, if any equivalence applies
389    #[must_use]
390    pub fn get_canonical(&self, purl: &str) -> Option<String> {
391        for (eq_idx, eq) in self.config.equivalences.iter().enumerate() {
392            if purl == eq.canonical {
393                return Some(eq.canonical.clone());
394            }
395            if self.alias_matches(eq_idx, eq, purl) {
396                return Some(eq.canonical.clone());
397            }
398        }
399        None
400    }
401}
402
403/// Compile a glob pattern to a regex at construction time.
404fn compile_glob(pattern: &str) -> Result<Regex, String> {
405    let regex_pattern = pattern
406        .replace('.', "\\.")
407        .replace('*', ".*")
408        .replace('?', ".");
409
410    Regex::new(&format!("^{regex_pattern}$"))
411        .map_err(|e| format!("Invalid glob pattern '{pattern}': {e}"))
412}
413
414/// Simple glob pattern matching (supports * and ?) - used only in tests
415#[cfg(test)]
416fn glob_matches(pattern: &str, text: &str) -> bool {
417    compile_glob(pattern)
418        .map(|re| re.is_match(text))
419        .unwrap_or(false)
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425
426    fn create_test_component(name: &str, purl: Option<&str>) -> Component {
427        use crate::model::*;
428        let mut comp = Component::new(name.to_string(), purl.unwrap_or(name).to_string());
429        comp.version = Some("1.0.0".to_string());
430        comp.identifiers.purl = purl.map(|s| s.to_string());
431        comp.ecosystem = Some(Ecosystem::Npm);
432        comp
433    }
434
435    #[test]
436    fn test_glob_matches() {
437        assert!(glob_matches("pkg:npm/*", "pkg:npm/lodash"));
438        assert!(glob_matches("pkg:npm/lodash*", "pkg:npm/lodash-es"));
439        assert!(!glob_matches("pkg:npm/*", "pkg:maven/test"));
440        assert!(glob_matches("*.json", "test.json"));
441    }
442
443    #[test]
444    fn test_exact_exclusion() {
445        let config = MatchingRulesConfig {
446            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
447            ..Default::default()
448        };
449        let engine = RuleEngine::new(config).unwrap();
450
451        assert!(engine.is_excluded("pkg:npm/jest"));
452        assert!(!engine.is_excluded("pkg:npm/lodash"));
453    }
454
455    #[test]
456    fn test_pattern_exclusion() {
457        let config = MatchingRulesConfig {
458            exclusions: vec![ExclusionRule::pattern("pkg:npm/test-*")],
459            ..Default::default()
460        };
461        let engine = RuleEngine::new(config).unwrap();
462
463        assert!(engine.is_excluded("pkg:npm/test-utils"));
464        assert!(engine.is_excluded("pkg:npm/test-runner"));
465        assert!(!engine.is_excluded("pkg:npm/lodash"));
466    }
467
468    #[test]
469    fn test_equivalence_matching() {
470        let config = MatchingRulesConfig {
471            equivalences: vec![EquivalenceGroup {
472                name: Some("Lodash".to_string()),
473                canonical: "pkg:npm/lodash".to_string(),
474                aliases: vec![
475                    AliasPattern::exact("pkg:npm/lodash-es"),
476                    AliasPattern::glob("pkg:npm/lodash.*"),
477                ],
478                version_sensitive: false,
479            }],
480            ..Default::default()
481        };
482        let engine = RuleEngine::new(config).unwrap();
483
484        assert_eq!(
485            engine.get_canonical("pkg:npm/lodash"),
486            Some("pkg:npm/lodash".to_string())
487        );
488        assert_eq!(
489            engine.get_canonical("pkg:npm/lodash-es"),
490            Some("pkg:npm/lodash".to_string())
491        );
492        assert_eq!(
493            engine.get_canonical("pkg:npm/lodash.min"),
494            Some("pkg:npm/lodash".to_string())
495        );
496        assert_eq!(engine.get_canonical("pkg:npm/underscore"), None);
497    }
498
499    #[test]
500    fn test_apply_rules() {
501        let config = MatchingRulesConfig {
502            equivalences: vec![EquivalenceGroup {
503                name: Some("Lodash".to_string()),
504                canonical: "pkg:npm/lodash".to_string(),
505                aliases: vec![AliasPattern::exact("pkg:npm/lodash-es")],
506                version_sensitive: false,
507            }],
508            exclusions: vec![ExclusionRule::exact("pkg:npm/jest")],
509            ..Default::default()
510        };
511        let engine = RuleEngine::new(config).unwrap();
512
513        let mut components = IndexMap::new();
514        components.insert(
515            CanonicalId::from_purl("pkg:npm/lodash-es"),
516            create_test_component("lodash-es", Some("pkg:npm/lodash-es")),
517        );
518        components.insert(
519            CanonicalId::from_purl("pkg:npm/jest"),
520            create_test_component("jest", Some("pkg:npm/jest")),
521        );
522        components.insert(
523            CanonicalId::from_purl("pkg:npm/react"),
524            create_test_component("react", Some("pkg:npm/react")),
525        );
526
527        let result = engine.apply(&components);
528
529        // lodash-es should be mapped to canonical lodash
530        assert!(
531            result
532                .canonical_map
533                .contains_key(&CanonicalId::from_purl("pkg:npm/lodash-es"))
534        );
535
536        // jest should be excluded
537        assert!(
538            result
539                .excluded
540                .contains(&CanonicalId::from_purl("pkg:npm/jest"))
541        );
542
543        // react should have no rules applied
544        assert!(
545            !result
546                .canonical_map
547                .contains_key(&CanonicalId::from_purl("pkg:npm/react"))
548        );
549        assert!(
550            !result
551                .excluded
552                .contains(&CanonicalId::from_purl("pkg:npm/react"))
553        );
554
555        // Check applied rules
556        assert_eq!(result.applied_rules.len(), 2);
557    }
558}