Skip to main content

sbom_tools/diff/
engine.rs

1//! Semantic diff engine implementation.
2
3use super::changes::{
4    ComponentChangeComputer, DependencyChangeComputer, LicenseChangeComputer,
5    VulnerabilityChangeComputer,
6};
7pub use super::engine_config::LargeSbomConfig;
8use super::engine_matching::{ComponentMatchResult, match_components};
9use super::engine_rules::{apply_rules, remap_match_result};
10use super::incremental::ChangedSections;
11use super::result::MatchMetrics;
12use super::traits::ChangeComputer;
13use super::{CostModel, DiffResult, GraphDiffConfig, MatchInfo, diff_dependency_graph};
14use crate::error::SbomDiffError;
15use crate::matching::{
16    ComponentMatcher, FuzzyMatchConfig, FuzzyMatcher, MatchingRulesConfig, RuleEngine,
17};
18use crate::model::NormalizedSbom;
19use std::borrow::Cow;
20
21/// Semantic diff engine for comparing SBOMs.
22#[must_use]
23pub struct DiffEngine {
24    cost_model: CostModel,
25    fuzzy_config: FuzzyMatchConfig,
26    include_unchanged: bool,
27    graph_diff_config: Option<GraphDiffConfig>,
28    rule_engine: Option<RuleEngine>,
29    custom_matcher: Option<Box<dyn ComponentMatcher>>,
30    large_sbom_config: LargeSbomConfig,
31}
32
33impl DiffEngine {
34    /// Create a new diff engine with default settings
35    pub fn new() -> Self {
36        Self {
37            cost_model: CostModel::default(),
38            fuzzy_config: FuzzyMatchConfig::balanced(),
39            include_unchanged: false,
40            graph_diff_config: None,
41            rule_engine: None,
42            custom_matcher: None,
43            large_sbom_config: LargeSbomConfig::default(),
44        }
45    }
46
47    /// Create a diff engine with a custom cost model
48    pub const fn with_cost_model(mut self, cost_model: CostModel) -> Self {
49        self.cost_model = cost_model;
50        self
51    }
52
53    /// Set fuzzy matching configuration
54    pub const fn with_fuzzy_config(mut self, config: FuzzyMatchConfig) -> Self {
55        self.fuzzy_config = config;
56        self
57    }
58
59    /// Include unchanged components in the result
60    pub const fn include_unchanged(mut self, include: bool) -> Self {
61        self.include_unchanged = include;
62        self
63    }
64
65    /// Enable graph-aware diffing with the given configuration
66    pub fn with_graph_diff(mut self, config: GraphDiffConfig) -> Self {
67        self.graph_diff_config = Some(config);
68        self
69    }
70
71    /// Set custom matching rules from a configuration
72    pub fn with_matching_rules(mut self, config: MatchingRulesConfig) -> Result<Self, String> {
73        self.rule_engine = Some(RuleEngine::new(config)?);
74        Ok(self)
75    }
76
77    /// Set custom matching rules engine directly
78    pub fn with_rule_engine(mut self, engine: RuleEngine) -> Self {
79        self.rule_engine = Some(engine);
80        self
81    }
82
83    /// Set a custom component matcher.
84    pub fn with_matcher(mut self, matcher: Box<dyn ComponentMatcher>) -> Self {
85        self.custom_matcher = Some(matcher);
86        self
87    }
88
89    /// Configure large SBOM optimization settings.
90    pub const fn with_large_sbom_config(mut self, config: LargeSbomConfig) -> Self {
91        self.large_sbom_config = config;
92        self
93    }
94
95    /// Get the large SBOM configuration.
96    #[must_use]
97    pub const fn large_sbom_config(&self) -> &LargeSbomConfig {
98        &self.large_sbom_config
99    }
100
101    /// Check if a custom matcher is configured
102    #[must_use]
103    pub fn has_custom_matcher(&self) -> bool {
104        self.custom_matcher.is_some()
105    }
106
107    /// Check if graph diffing is enabled
108    #[must_use]
109    pub const fn graph_diff_enabled(&self) -> bool {
110        self.graph_diff_config.is_some()
111    }
112
113    /// Check if custom matching rules are configured
114    #[must_use]
115    pub const fn has_matching_rules(&self) -> bool {
116        self.rule_engine.is_some()
117    }
118
119    /// Compare two SBOMs and return the diff result
120    #[must_use = "diff result contains all changes and should not be discarded"]
121    pub fn diff(
122        &self,
123        old: &NormalizedSbom,
124        new: &NormalizedSbom,
125    ) -> Result<DiffResult, SbomDiffError> {
126        let _span = tracing::info_span!(
127            "diff_engine::diff",
128            old_components = old.component_count(),
129            new_components = new.component_count(),
130        )
131        .entered();
132
133        let mut result = DiffResult::new();
134
135        // Quick check: if content hashes match, SBOMs are identical
136        if old.content_hash == new.content_hash && old.content_hash != 0 {
137            return Ok(result);
138        }
139
140        // Apply custom matching rules if configured
141        // Use Cow to avoid cloning SBOMs when no rules are applied
142        let (old_filtered, new_filtered, canonical_maps) =
143            if let Some(rule_result) = apply_rules(self.rule_engine.as_ref(), old, new) {
144                result.rules_applied = rule_result.rules_count;
145                (
146                    Cow::Owned(rule_result.old_filtered),
147                    Cow::Owned(rule_result.new_filtered),
148                    Some((rule_result.old_canonical, rule_result.new_canonical)),
149                )
150            } else {
151                (Cow::Borrowed(old), Cow::Borrowed(new), None)
152            };
153
154        // Build component mappings using the configured matcher
155        let default_matcher = FuzzyMatcher::new(self.fuzzy_config.clone());
156        let matcher: &dyn ComponentMatcher = self
157            .custom_matcher
158            .as_ref()
159            .map_or(&default_matcher as &dyn ComponentMatcher, |m| m.as_ref());
160
161        let mut component_matches = match_components(
162            &old_filtered,
163            &new_filtered,
164            matcher,
165            &self.fuzzy_config,
166            &self.large_sbom_config,
167        );
168
169        // Apply canonical mappings from rule engine
170        if let Some((old_canonical, new_canonical)) = &canonical_maps {
171            component_matches =
172                remap_match_result(&component_matches, old_canonical, new_canonical);
173        }
174
175        // Compute match metrics for observability
176        {
177            let scores: Vec<f64> = component_matches.pairs.values().copied().collect();
178            let exact = scores.iter().filter(|&&s| s >= 0.99).count();
179            let fuzzy = scores.len() - exact;
180            let matched_count = scores.len();
181            let unmatched_old = old_filtered.component_count().saturating_sub(matched_count);
182            let unmatched_new = new_filtered.component_count().saturating_sub(matched_count);
183            let avg = if scores.is_empty() {
184                0.0
185            } else {
186                scores.iter().sum::<f64>() / scores.len() as f64
187            };
188            let min = scores.iter().copied().fold(f64::INFINITY, f64::min);
189
190            result.match_metrics = Some(MatchMetrics {
191                exact_matches: exact,
192                fuzzy_matches: fuzzy,
193                rule_matches: result.rules_applied,
194                unmatched_old,
195                unmatched_new,
196                avg_match_score: avg,
197                min_match_score: if min.is_infinite() { 0.0 } else { min },
198            });
199        }
200
201        // Compute changes using the modular change computers
202        self.compute_all_changes(
203            &old_filtered,
204            &new_filtered,
205            &component_matches,
206            matcher,
207            &mut result,
208        );
209
210        // Perform graph-aware diffing if enabled
211        if let Some(ref graph_config) = self.graph_diff_config {
212            let (graph_changes, graph_summary) = diff_dependency_graph(
213                &old_filtered,
214                &new_filtered,
215                &component_matches.matches,
216                graph_config,
217            );
218            result.graph_changes = graph_changes;
219            result.graph_summary = Some(graph_summary);
220        }
221
222        // Calculate semantic score
223        result.semantic_score = self.compute_semantic_score(&result);
224
225        result.calculate_summary();
226        Ok(result)
227    }
228
229    /// Compute all changes using the modular change computers.
230    fn compute_all_changes(
231        &self,
232        old: &NormalizedSbom,
233        new: &NormalizedSbom,
234        match_result: &ComponentMatchResult,
235        matcher: &dyn ComponentMatcher,
236        result: &mut DiffResult,
237    ) {
238        // Component changes
239        let comp_computer = ComponentChangeComputer::new(self.cost_model.clone());
240        let comp_changes = comp_computer.compute(old, new, &match_result.matches);
241        result.components.added = comp_changes.added;
242        result.components.removed = comp_changes.removed;
243        result.components.modified = comp_changes
244            .modified
245            .into_iter()
246            .map(|mut change| {
247                // Add match explanation for modified components
248                // Use stored canonical IDs directly instead of reconstructing from name+version
249                if let (Some(old_id), Some(new_id)) =
250                    (&change.old_canonical_id, &change.canonical_id)
251                    && let (Some(old_comp), Some(new_comp)) =
252                        (old.components.get(old_id), new.components.get(new_id))
253                {
254                    let explanation = matcher.explain_match(old_comp, new_comp);
255                    let mut match_info = MatchInfo::from_explanation(&explanation);
256
257                    // Use the actual score from the matching phase if available
258                    if let Some(&score) = match_result.pairs.get(&(old_id.clone(), new_id.clone()))
259                    {
260                        match_info.score = score;
261                    }
262
263                    change = change.with_match_info(match_info);
264                }
265                change
266            })
267            .collect();
268
269        // Dependency changes
270        let dep_computer = DependencyChangeComputer::new();
271        let dep_changes = dep_computer.compute(old, new, &match_result.matches);
272        result.dependencies.added = dep_changes.added;
273        result.dependencies.removed = dep_changes.removed;
274
275        // License changes
276        let lic_computer = LicenseChangeComputer::new();
277        let lic_changes = lic_computer.compute(old, new, &match_result.matches);
278        result.licenses.new_licenses = lic_changes.new_licenses;
279        result.licenses.removed_licenses = lic_changes.removed_licenses;
280
281        // Vulnerability changes
282        let vuln_computer = VulnerabilityChangeComputer::new();
283        let vuln_changes = vuln_computer.compute(old, new, &match_result.matches);
284        result.vulnerabilities.introduced = vuln_changes.introduced;
285        result.vulnerabilities.resolved = vuln_changes.resolved;
286        result.vulnerabilities.persistent = vuln_changes.persistent;
287        result.vulnerabilities.vex_changes = vuln_changes.vex_changes;
288    }
289
290    /// Diff only the specified sections, reusing cached results for unchanged sections.
291    ///
292    /// This enables true incremental diffing: when only some SBOM sections changed,
293    /// we skip recomputing the unchanged sections and reuse them from the cached result.
294    /// Component matching is always recomputed since it's needed by all section computers.
295    ///
296    /// Falls back to a full diff if no cached result is provided.
297    pub(crate) fn diff_sections(
298        &self,
299        old: &NormalizedSbom,
300        new: &NormalizedSbom,
301        sections: &ChangedSections,
302        cached: &DiffResult,
303    ) -> Result<DiffResult, SbomDiffError> {
304        // Start with the cached result so unchanged sections are preserved
305        let mut result = cached.clone();
306
307        // Apply custom matching rules if configured
308        let (old_filtered, new_filtered, canonical_maps) =
309            if let Some(rule_result) = apply_rules(self.rule_engine.as_ref(), old, new) {
310                result.rules_applied = rule_result.rules_count;
311                (
312                    Cow::Owned(rule_result.old_filtered),
313                    Cow::Owned(rule_result.new_filtered),
314                    Some((rule_result.old_canonical, rule_result.new_canonical)),
315                )
316            } else {
317                (Cow::Borrowed(old), Cow::Borrowed(new), None)
318            };
319
320        // Always recompute matching — it's needed for any section computer
321        let default_matcher = FuzzyMatcher::new(self.fuzzy_config.clone());
322        let matcher: &dyn ComponentMatcher = self
323            .custom_matcher
324            .as_ref()
325            .map_or(&default_matcher as &dyn ComponentMatcher, |m| m.as_ref());
326
327        let mut component_matches = match_components(
328            &old_filtered,
329            &new_filtered,
330            matcher,
331            &self.fuzzy_config,
332            &self.large_sbom_config,
333        );
334
335        // Apply canonical mappings from rule engine
336        if let Some((old_canonical, new_canonical)) = &canonical_maps {
337            component_matches =
338                remap_match_result(&component_matches, old_canonical, new_canonical);
339        }
340
341        // Selectively recompute only the changed sections
342        if sections.components {
343            let comp_computer = ComponentChangeComputer::new(self.cost_model.clone());
344            let comp_changes =
345                comp_computer.compute(&old_filtered, &new_filtered, &component_matches.matches);
346            result.components.added = comp_changes.added;
347            result.components.removed = comp_changes.removed;
348            result.components.modified = comp_changes
349                .modified
350                .into_iter()
351                .map(|mut change| {
352                    if let (Some(old_id), Some(new_id)) =
353                        (&change.old_canonical_id, &change.canonical_id)
354                        && let (Some(old_comp), Some(new_comp)) = (
355                            old_filtered.components.get(old_id),
356                            new_filtered.components.get(new_id),
357                        )
358                    {
359                        let explanation = matcher.explain_match(old_comp, new_comp);
360                        let mut match_info = MatchInfo::from_explanation(&explanation);
361                        if let Some(&score) = component_matches
362                            .pairs
363                            .get(&(old_id.clone(), new_id.clone()))
364                        {
365                            match_info.score = score;
366                        }
367                        change = change.with_match_info(match_info);
368                    }
369                    change
370                })
371                .collect();
372        }
373
374        if sections.dependencies {
375            let dep_computer = DependencyChangeComputer::new();
376            let dep_changes =
377                dep_computer.compute(&old_filtered, &new_filtered, &component_matches.matches);
378            result.dependencies.added = dep_changes.added;
379            result.dependencies.removed = dep_changes.removed;
380        }
381
382        if sections.licenses {
383            let lic_computer = LicenseChangeComputer::new();
384            let lic_changes =
385                lic_computer.compute(&old_filtered, &new_filtered, &component_matches.matches);
386            result.licenses.new_licenses = lic_changes.new_licenses;
387            result.licenses.removed_licenses = lic_changes.removed_licenses;
388        }
389
390        if sections.vulnerabilities {
391            let vuln_computer = VulnerabilityChangeComputer::new();
392            let vuln_changes =
393                vuln_computer.compute(&old_filtered, &new_filtered, &component_matches.matches);
394            result.vulnerabilities.introduced = vuln_changes.introduced;
395            result.vulnerabilities.resolved = vuln_changes.resolved;
396            result.vulnerabilities.persistent = vuln_changes.persistent;
397            result.vulnerabilities.vex_changes = vuln_changes.vex_changes;
398        }
399
400        // Always recompute summary and semantic score since they depend on all sections
401        result.semantic_score = self.compute_semantic_score(&result);
402        result.calculate_summary();
403        Ok(result)
404    }
405
406    /// Compute the semantic score from a `DiffResult`.
407    fn compute_semantic_score(&self, result: &DiffResult) -> f64 {
408        self.cost_model.calculate_semantic_score(
409            result.components.added.len(),
410            result.components.removed.len(),
411            result.components.modified.len(),
412            result.licenses.component_changes.len(),
413            result.vulnerabilities.introduced.len(),
414            result.vulnerabilities.resolved.len(),
415            result.dependencies.added.len(),
416            result.dependencies.removed.len(),
417        )
418    }
419}
420
421impl Default for DiffEngine {
422    fn default() -> Self {
423        Self::new()
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use super::*;
430
431    #[test]
432    fn test_empty_diff() {
433        let engine = DiffEngine::new();
434        let sbom = NormalizedSbom::default();
435        let result = engine.diff(&sbom, &sbom).expect("diff should succeed");
436        assert!(!result.has_changes());
437    }
438}