Skip to main content

sbom_tools/diff/
multi_engine.rs

1//! Multi-SBOM comparison engines.
2//!
3//! Uses [`IncrementalDiffEngine`] internally to cache diff results across
4//! repeated comparisons (timeline, matrix, diff-multi), avoiding redundant
5//! recomputation when the same SBOM pair is compared multiple times.
6
7use super::incremental::IncrementalDiffEngine;
8use super::multi::{
9    ComparisonResult, ComplianceScoreEntry, ComplianceSnapshot, ComponentEvolution,
10    DependencySnapshot, DivergenceType, DivergentComponent, EvolutionSummary,
11    InconsistentComponent, MatrixResult, MultiDiffResult, MultiDiffSummary, SbomCluster,
12    SbomClustering, SbomInfo, SecurityImpact, TimelineResult, VariableComponent, VersionAtPoint,
13    VersionChangeType, VersionSpread, VulnerabilityMatrix, VulnerabilitySnapshot,
14};
15use super::{DiffEngine, DiffResult};
16use crate::error::SbomDiffError;
17use crate::matching::{FuzzyMatchConfig, MatchingRulesConfig};
18use crate::model::{NormalizedSbom, VulnerabilityCounts};
19use std::collections::{HashMap, HashSet};
20
21/// Engine for multi-SBOM comparisons.
22///
23/// Internally wraps an [`IncrementalDiffEngine`] so that repeated comparisons
24/// of the same SBOM pairs (common in timeline and matrix modes) benefit from
25/// result caching.
26pub struct MultiDiffEngine {
27    /// Fuzzy matching configuration (applied when building the engine).
28    fuzzy_config: Option<FuzzyMatchConfig>,
29    /// Whether to include unchanged components in diff results.
30    include_unchanged: bool,
31    /// Graph diff configuration (optional).
32    graph_diff_config: Option<super::GraphDiffConfig>,
33    /// Custom matching rules (applied when building the engine).
34    matching_rules: Option<MatchingRulesConfig>,
35    /// Caching wrapper built lazily on first diff operation.
36    incremental: Option<IncrementalDiffEngine>,
37}
38
39impl MultiDiffEngine {
40    #[must_use]
41    pub const fn new() -> Self {
42        Self {
43            fuzzy_config: None,
44            include_unchanged: false,
45            graph_diff_config: None,
46            matching_rules: None,
47            incremental: None,
48        }
49    }
50
51    /// Configure fuzzy matching
52    #[must_use]
53    pub fn with_fuzzy_config(mut self, config: FuzzyMatchConfig) -> Self {
54        self.fuzzy_config = Some(config);
55        self.incremental = None;
56        self
57    }
58
59    /// Include unchanged components
60    #[must_use]
61    pub fn include_unchanged(mut self, include: bool) -> Self {
62        self.include_unchanged = include;
63        self.incremental = None;
64        self
65    }
66
67    /// Enable graph-aware diffing with the given configuration
68    #[must_use]
69    pub fn with_graph_diff(mut self, config: super::GraphDiffConfig) -> Self {
70        self.graph_diff_config = Some(config);
71        self.incremental = None;
72        self
73    }
74
75    /// Apply custom matching rules to every pairwise diff.
76    #[must_use]
77    pub fn with_matching_rules(mut self, rules: MatchingRulesConfig) -> Self {
78        self.matching_rules = Some(rules);
79        self.incremental = None;
80        self
81    }
82
83    /// Build the configured `DiffEngine` and wrap it in an `IncrementalDiffEngine`.
84    fn ensure_engine(&mut self) {
85        if self.incremental.is_none() {
86            let mut engine = DiffEngine::new();
87            if let Some(config) = self.fuzzy_config.clone() {
88                engine = engine.with_fuzzy_config(config);
89            }
90            engine = engine.include_unchanged(self.include_unchanged);
91            if let Some(config) = self.graph_diff_config.clone() {
92                engine = engine.with_graph_diff(config);
93            }
94            if let Some(rules) = self.matching_rules.clone() {
95                match crate::matching::RuleEngine::new(rules) {
96                    Ok(rule_engine) => engine = engine.with_rule_engine(rule_engine),
97                    Err(err) => {
98                        tracing::warn!("Failed to initialize matching rule engine: {err}");
99                    }
100                }
101            }
102            self.incremental = Some(IncrementalDiffEngine::new(engine));
103        }
104    }
105
106    /// Perform a single diff using the cached incremental engine.
107    fn cached_diff(
108        &mut self,
109        old: &NormalizedSbom,
110        new: &NormalizedSbom,
111    ) -> Result<DiffResult, SbomDiffError> {
112        self.ensure_engine();
113        Ok(self
114            .incremental
115            .as_ref()
116            .expect("engine initialized by ensure_engine")
117            .diff(old, new)?
118            .into_result())
119    }
120
121    /// Perform 1:N diff-multi comparison (baseline vs multiple targets)
122    ///
123    /// # Errors
124    ///
125    /// Returns an error if any pairwise diff computation fails.
126    pub fn diff_multi(
127        &mut self,
128        baseline: &NormalizedSbom,
129        baseline_name: &str,
130        baseline_path: &str,
131        targets: &[(&NormalizedSbom, &str, &str)], // (sbom, name, path)
132    ) -> Result<MultiDiffResult, SbomDiffError> {
133        let baseline_info = SbomInfo::from_sbom(
134            baseline,
135            baseline_name.to_string(),
136            baseline_path.to_string(),
137        );
138
139        // Compute individual diffs
140        let mut comparisons: Vec<ComparisonResult> = Vec::new();
141        let mut all_versions: HashMap<String, HashMap<String, String>> = HashMap::new(); // component_id -> (target_name -> version)
142
143        // Collect baseline versions
144        for (id, comp) in &baseline.components {
145            let version = comp.version.clone().unwrap_or_default();
146            all_versions
147                .entry(id.value().to_string())
148                .or_default()
149                .insert(baseline_name.to_string(), version);
150        }
151
152        for (target_sbom, target_name, target_path) in targets {
153            let diff = self.cached_diff(baseline, target_sbom)?;
154            let target_info = SbomInfo::from_sbom(
155                target_sbom,
156                target_name.to_string(),
157                target_path.to_string(),
158            );
159
160            // Collect target versions
161            for (id, comp) in &target_sbom.components {
162                let version = comp.version.clone().unwrap_or_default();
163                all_versions
164                    .entry(id.value().to_string())
165                    .or_default()
166                    .insert(target_name.to_string(), version);
167            }
168
169            comparisons.push(ComparisonResult {
170                target: target_info,
171                diff,
172                unique_components: vec![],    // Computed in summary phase
173                divergent_components: vec![], // Computed in summary phase
174            });
175        }
176
177        // Compute summary
178        let summary = self.compute_multi_diff_summary(
179            &baseline_info,
180            baseline,
181            &comparisons,
182            targets,
183            &all_versions,
184        );
185
186        // Update comparisons with divergent component info
187        for (i, comp) in comparisons.iter_mut().enumerate() {
188            let (target_sbom, target_name, _) = &targets[i];
189            comp.divergent_components =
190                self.find_divergent_components(baseline, target_sbom, target_name, &all_versions);
191        }
192
193        Ok(MultiDiffResult {
194            baseline: baseline_info,
195            comparisons,
196            summary,
197        })
198    }
199
200    fn compute_multi_diff_summary(
201        &self,
202        baseline_info: &SbomInfo,
203        baseline: &NormalizedSbom,
204        comparisons: &[ComparisonResult],
205        targets: &[(&NormalizedSbom, &str, &str)],
206        all_versions: &HashMap<String, HashMap<String, String>>,
207    ) -> MultiDiffSummary {
208        let baseline_components: HashSet<_> = baseline
209            .components
210            .keys()
211            .map(|k| k.value().to_string())
212            .collect();
213        let _target_names: Vec<_> = targets
214            .iter()
215            .map(|(_, name, _)| name.to_string())
216            .collect();
217
218        // Find universal components (in baseline and ALL targets)
219        let mut universal: HashSet<String> = baseline_components.clone();
220        for (target_sbom, _, _) in targets {
221            let target_components: HashSet<_> = target_sbom
222                .components
223                .keys()
224                .map(|k| k.value().to_string())
225                .collect();
226            universal = universal
227                .intersection(&target_components)
228                .cloned()
229                .collect();
230        }
231
232        // Find variable components (different versions across targets)
233        let mut variable_components: Vec<VariableComponent> = vec![];
234        for (comp_id, versions) in all_versions {
235            let unique_versions: HashSet<_> = versions.values().collect();
236            if unique_versions.len() > 1 {
237                let name = baseline
238                    .components
239                    .iter()
240                    .find(|(id, _)| id.value() == comp_id)
241                    .map(|(_, c)| c.name.clone())
242                    .or_else(|| {
243                        targets.iter().find_map(|(sbom, _, _)| {
244                            sbom.components
245                                .iter()
246                                .find(|(id, _)| id.value() == comp_id)
247                                .map(|(_, c)| c.name.clone())
248                        })
249                    })
250                    .unwrap_or_else(|| comp_id.clone());
251
252                let baseline_version = versions.get(&baseline_info.name.clone()).cloned();
253                let all_versions_vec: Vec<_> = unique_versions.into_iter().cloned().collect();
254
255                // Calculate major version spread
256                let major_spread = calculate_major_version_spread(&all_versions_vec);
257
258                variable_components.push(VariableComponent {
259                    id: comp_id.clone(),
260                    name: name.clone(),
261                    ecosystem: None,
262                    version_spread: VersionSpread {
263                        baseline: baseline_version,
264                        min_version: all_versions_vec.iter().min().cloned(),
265                        max_version: all_versions_vec.iter().max().cloned(),
266                        unique_versions: all_versions_vec,
267                        is_consistent: false,
268                        major_version_spread: major_spread,
269                    },
270                    targets_with_component: versions.keys().cloned().collect(),
271                    security_impact: classify_security_impact(&name),
272                });
273            }
274        }
275
276        // Find inconsistent components (missing from some targets)
277        let mut inconsistent_components: Vec<InconsistentComponent> = vec![];
278        let all_component_ids: HashSet<_> = all_versions.keys().cloned().collect();
279
280        for comp_id in &all_component_ids {
281            if universal.contains(comp_id) {
282                continue; // Present everywhere, not inconsistent
283            }
284
285            let in_baseline = baseline_components.contains(comp_id);
286            let mut present_in: Vec<String> = vec![];
287            let mut missing_from: Vec<String> = vec![];
288
289            if in_baseline {
290                present_in.push(baseline_info.name.clone());
291            } else {
292                missing_from.push(baseline_info.name.clone());
293            }
294
295            for (target_sbom, target_name, _) in targets {
296                let has_component = target_sbom
297                    .components
298                    .iter()
299                    .any(|(id, _)| id.value() == comp_id);
300                if has_component {
301                    present_in.push(target_name.to_string());
302                } else {
303                    missing_from.push(target_name.to_string());
304                }
305            }
306
307            if !missing_from.is_empty() {
308                let name = all_versions
309                    .get(comp_id)
310                    .and_then(|_| {
311                        baseline
312                            .components
313                            .iter()
314                            .find(|(id, _)| id.value() == comp_id)
315                            .map(|(_, c)| c.name.clone())
316                    })
317                    .unwrap_or_else(|| comp_id.clone());
318
319                inconsistent_components.push(InconsistentComponent {
320                    id: comp_id.clone(),
321                    name,
322                    in_baseline,
323                    present_in,
324                    missing_from,
325                });
326            }
327        }
328
329        // Compute deviation scores
330        let mut deviation_scores: HashMap<String, f64> = HashMap::new();
331        let mut max_deviation = 0.0f64;
332
333        for comp in comparisons {
334            let score = 100.0 - comp.diff.semantic_score;
335            deviation_scores.insert(comp.target.name.clone(), score);
336            max_deviation = max_deviation.max(score);
337        }
338
339        // Build vulnerability matrix with unique and common vulnerabilities
340        let vulnerability_matrix =
341            compute_vulnerability_matrix(baseline, &baseline_info.name, targets);
342
343        MultiDiffSummary {
344            baseline_component_count: baseline_info.component_count,
345            universal_components: universal.into_iter().collect(),
346            variable_components,
347            inconsistent_components,
348            deviation_scores,
349            max_deviation,
350            vulnerability_matrix,
351        }
352    }
353
354    fn find_divergent_components(
355        &self,
356        baseline: &NormalizedSbom,
357        target: &NormalizedSbom,
358        _target_name: &str,
359        all_versions: &HashMap<String, HashMap<String, String>>,
360    ) -> Vec<DivergentComponent> {
361        let mut divergent = vec![];
362
363        for (id, comp) in &target.components {
364            let comp_id = id.value().to_string();
365            let target_version = comp.version.clone().unwrap_or_default();
366
367            // Check if baseline has different version
368            let baseline_version = baseline
369                .components
370                .iter()
371                .find(|(bid, _)| bid.value() == comp_id)
372                .and_then(|(_, bc)| bc.version.clone());
373
374            let divergence_type = if baseline_version.is_none() {
375                DivergenceType::Added
376            } else if baseline_version.as_ref() != Some(&target_version) {
377                DivergenceType::VersionMismatch
378            } else {
379                continue; // Same version, not divergent
380            };
381
382            divergent.push(DivergentComponent {
383                id: comp_id.clone(),
384                name: comp.name.clone(),
385                baseline_version,
386                target_version,
387                versions_across_targets: all_versions.get(&comp_id).cloned().unwrap_or_default(),
388                divergence_type,
389            });
390        }
391
392        // Check for removed components
393        for (id, comp) in &baseline.components {
394            let comp_id = id.value().to_string();
395            let in_target = target
396                .components
397                .iter()
398                .any(|(tid, _)| tid.value() == comp_id);
399
400            if !in_target {
401                divergent.push(DivergentComponent {
402                    id: comp_id.clone(),
403                    name: comp.name.clone(),
404                    baseline_version: comp.version.clone(),
405                    target_version: String::new(),
406                    versions_across_targets: all_versions
407                        .get(&comp_id)
408                        .cloned()
409                        .unwrap_or_default(),
410                    divergence_type: DivergenceType::Removed,
411                });
412            }
413        }
414
415        divergent
416    }
417
418    /// Perform timeline analysis across ordered SBOM versions
419    ///
420    /// # Errors
421    ///
422    /// Returns an error if any pairwise diff computation fails.
423    pub fn timeline(
424        &mut self,
425        sboms: &[(&NormalizedSbom, &str, &str)], // (sbom, name, path)
426    ) -> Result<TimelineResult, SbomDiffError> {
427        let sbom_infos: Vec<SbomInfo> = sboms
428            .iter()
429            .map(|(sbom, name, path)| SbomInfo::from_sbom(sbom, name.to_string(), path.to_string()))
430            .collect();
431
432        // Compute incremental diffs (adjacent pairs)
433        let mut incremental_diffs: Vec<DiffResult> = vec![];
434        for i in 0..sboms.len().saturating_sub(1) {
435            let diff = self.cached_diff(sboms[i].0, sboms[i + 1].0)?;
436            incremental_diffs.push(diff);
437        }
438
439        // Compute cumulative diffs from first
440        let mut cumulative_from_first: Vec<DiffResult> = vec![];
441        if !sboms.is_empty() {
442            for i in 1..sboms.len() {
443                let diff = self.cached_diff(sboms[0].0, sboms[i].0)?;
444                cumulative_from_first.push(diff);
445            }
446        }
447
448        // Build evolution summary
449        let evolution_summary =
450            self.build_evolution_summary(sboms, &sbom_infos, &incremental_diffs);
451
452        Ok(TimelineResult {
453            sboms: sbom_infos,
454            incremental_diffs,
455            cumulative_from_first,
456            evolution_summary,
457        })
458    }
459
460    fn build_evolution_summary(
461        &self,
462        sboms: &[(&NormalizedSbom, &str, &str)],
463        sbom_infos: &[SbomInfo],
464        _incremental_diffs: &[DiffResult],
465    ) -> EvolutionSummary {
466        // Track component versions across timeline
467        let mut version_history: HashMap<String, Vec<VersionAtPoint>> = HashMap::new();
468        let mut components_added: Vec<ComponentEvolution> = vec![];
469        let mut components_removed: Vec<ComponentEvolution> = vec![];
470        let mut all_components: HashSet<String> = HashSet::new();
471
472        // Collect all component IDs
473        for (sbom, _, _) in sboms {
474            for (id, _) in &sbom.components {
475                all_components.insert(id.value().to_string());
476            }
477        }
478
479        // Build version history for each component
480        for comp_id in &all_components {
481            let mut history: Vec<VersionAtPoint> = vec![];
482            let mut first_seen: Option<(usize, String)> = None;
483            let mut last_seen: Option<usize> = None;
484            let mut prev_version: Option<String> = None;
485            let mut version_change_count: usize = 0;
486
487            for (i, (sbom, name, _)) in sboms.iter().enumerate() {
488                let comp = sbom.components.iter().find(|(id, _)| id.value() == comp_id);
489
490                let (version, change_type) = if let Some((_, c)) = comp {
491                    let ver = c.version.clone();
492                    let change = if first_seen.is_none() {
493                        first_seen = Some((i, ver.clone().unwrap_or_default()));
494                        VersionChangeType::Initial
495                    } else {
496                        let ct = classify_version_change(prev_version.as_ref(), ver.as_ref());
497                        // Count actual version changes (not unchanged or absent)
498                        if !matches!(ct, VersionChangeType::Unchanged | VersionChangeType::Absent) {
499                            version_change_count += 1;
500                        }
501                        ct
502                    };
503                    last_seen = Some(i);
504                    prev_version.clone_from(&ver);
505                    (ver, change)
506                } else if first_seen.is_some() {
507                    (None, VersionChangeType::Removed)
508                } else {
509                    (None, VersionChangeType::Absent)
510                };
511
512                history.push(VersionAtPoint {
513                    sbom_index: i,
514                    sbom_name: name.to_string(),
515                    version,
516                    change_type,
517                });
518            }
519
520            version_history.insert(comp_id.clone(), history);
521
522            // Track added/removed
523            if let Some((first_idx, first_ver)) = first_seen {
524                let still_present = last_seen == Some(sboms.len() - 1);
525                let current_version = if still_present {
526                    sboms.last().and_then(|(sbom, _, _)| {
527                        sbom.components
528                            .iter()
529                            .find(|(id, _)| id.value() == comp_id)
530                            .and_then(|(_, c)| c.version.clone())
531                    })
532                } else {
533                    None
534                };
535
536                let name = sboms
537                    .iter()
538                    .find_map(|(sbom, _, _)| {
539                        sbom.components
540                            .iter()
541                            .find(|(id, _)| id.value() == comp_id)
542                            .map(|(_, c)| c.name.clone())
543                    })
544                    .unwrap_or_else(|| comp_id.clone());
545
546                let evolution = ComponentEvolution {
547                    id: comp_id.clone(),
548                    name,
549                    first_seen_index: first_idx,
550                    first_seen_version: first_ver,
551                    last_seen_index: if still_present { None } else { last_seen },
552                    current_version,
553                    version_change_count,
554                };
555
556                if first_idx > 0 {
557                    components_added.push(evolution.clone());
558                }
559                if !still_present {
560                    components_removed.push(evolution);
561                }
562            }
563        }
564
565        // Build vulnerability trend
566        let vulnerability_trend: Vec<VulnerabilitySnapshot> = sbom_infos
567            .iter()
568            .enumerate()
569            .map(|(i, info)| VulnerabilitySnapshot {
570                sbom_index: i,
571                sbom_name: info.name.clone(),
572                counts: info.vulnerability_counts.clone(),
573                new_vulnerabilities: vec![],
574                resolved_vulnerabilities: vec![],
575            })
576            .collect();
577
578        // Build dependency trend, computing transitive deps from edge depth data
579        let dependency_trend: Vec<DependencySnapshot> = sboms
580            .iter()
581            .enumerate()
582            .map(|(i, (sbom, _, _))| {
583                let total_edges = sbom.edges.len();
584                // Count root nodes (no incoming edges) to determine direct vs transitive
585                let targets: HashSet<_> = sbom.edges.iter().map(|e| &e.to).collect();
586                let sources: HashSet<_> = sbom.edges.iter().map(|e| &e.from).collect();
587                let roots: HashSet<_> = sources.difference(&targets).collect();
588                let direct = sbom
589                    .edges
590                    .iter()
591                    .filter(|e| roots.contains(&&e.from))
592                    .count();
593                let transitive = total_edges.saturating_sub(direct);
594
595                DependencySnapshot {
596                    sbom_index: i,
597                    sbom_name: sbom_infos[i].name.clone(),
598                    direct_dependencies: direct,
599                    transitive_dependencies: transitive,
600                    total_edges,
601                }
602            })
603            .collect();
604
605        // Build compliance trend
606        let compliance_trend: Vec<ComplianceSnapshot> = sboms
607            .iter()
608            .enumerate()
609            .map(|(i, (sbom, name, _))| {
610                use crate::quality::{ComplianceChecker, ComplianceLevel};
611                let scores = ComplianceLevel::all()
612                    .iter()
613                    .map(|level| {
614                        let result = ComplianceChecker::new(*level).check(sbom);
615                        ComplianceScoreEntry {
616                            standard: level.name().to_string(),
617                            error_count: result.error_count,
618                            warning_count: result.warning_count,
619                            info_count: result.info_count,
620                            is_compliant: result.is_compliant,
621                        }
622                    })
623                    .collect();
624                ComplianceSnapshot {
625                    sbom_index: i,
626                    sbom_name: name.to_string(),
627                    scores,
628                }
629            })
630            .collect();
631
632        EvolutionSummary {
633            components_added,
634            components_removed,
635            version_history,
636            vulnerability_trend,
637            license_changes: vec![],
638            dependency_trend,
639            compliance_trend,
640        }
641    }
642
643    /// Perform N×N matrix comparison
644    ///
645    /// # Errors
646    ///
647    /// Returns an error if any pairwise diff computation fails.
648    pub fn matrix(
649        &mut self,
650        sboms: &[(&NormalizedSbom, &str, &str)], // (sbom, name, path)
651        similarity_threshold: Option<f64>,
652    ) -> Result<MatrixResult, SbomDiffError> {
653        let sbom_infos: Vec<SbomInfo> = sboms
654            .iter()
655            .map(|(sbom, name, path)| SbomInfo::from_sbom(sbom, name.to_string(), path.to_string()))
656            .collect();
657
658        let n = sboms.len();
659        let num_pairs = n * (n - 1) / 2;
660
661        let mut diffs: Vec<Option<DiffResult>> = vec![None; num_pairs];
662        let mut similarity_scores: Vec<f64> = vec![0.0; num_pairs];
663
664        // Compute upper triangle
665        let mut idx = 0;
666        for i in 0..n {
667            for j in (i + 1)..n {
668                let diff = self.cached_diff(sboms[i].0, sboms[j].0)?;
669                let similarity = diff.semantic_score / 100.0;
670                similarity_scores[idx] = similarity;
671                diffs[idx] = Some(diff);
672                idx += 1;
673            }
674        }
675
676        // Optional clustering
677        let clustering = similarity_threshold
678            .map(|threshold| self.cluster_sboms(&sbom_infos, &similarity_scores, threshold));
679
680        Ok(MatrixResult {
681            sboms: sbom_infos,
682            diffs,
683            similarity_scores,
684            clustering,
685        })
686    }
687
688    fn cluster_sboms(
689        &self,
690        sboms: &[SbomInfo],
691        similarity_scores: &[f64],
692        threshold: f64,
693    ) -> SbomClustering {
694        let n = sboms.len();
695        let mut clusters: Vec<SbomCluster> = vec![];
696        let mut assigned: HashSet<usize> = HashSet::new();
697
698        // Simple greedy clustering
699        for i in 0..n {
700            if assigned.contains(&i) {
701                continue;
702            }
703
704            let mut cluster_members = vec![i];
705            assigned.insert(i);
706
707            for j in (i + 1)..n {
708                if assigned.contains(&j) {
709                    continue;
710                }
711
712                // Get similarity between i and j
713                let idx = i * (2 * n - i - 1) / 2 + (j - i - 1);
714                let similarity = similarity_scores.get(idx).copied().unwrap_or(0.0);
715
716                if similarity >= threshold {
717                    cluster_members.push(j);
718                    assigned.insert(j);
719                }
720            }
721
722            if cluster_members.len() > 1 {
723                // Calculate average internal similarity
724                let mut total_sim = 0.0;
725                let mut count = 0;
726                for (mi, &a) in cluster_members.iter().enumerate() {
727                    for &b in cluster_members.iter().skip(mi + 1) {
728                        let (x, y) = if a < b { (a, b) } else { (b, a) };
729                        let idx = x * (2 * n - x - 1) / 2 + (y - x - 1);
730                        total_sim += similarity_scores.get(idx).copied().unwrap_or(0.0);
731                        count += 1;
732                    }
733                }
734
735                clusters.push(SbomCluster {
736                    members: cluster_members.clone(),
737                    centroid_index: cluster_members[0],
738                    internal_similarity: if count > 0 {
739                        total_sim / f64::from(count)
740                    } else {
741                        1.0
742                    },
743                    label: None,
744                });
745            }
746        }
747
748        // Find outliers
749        let outliers: Vec<usize> = (0..n).filter(|i| !assigned.contains(i)).collect();
750
751        SbomClustering {
752            clusters,
753            outliers,
754            algorithm: "greedy".to_string(),
755            threshold,
756        }
757    }
758}
759
760impl Default for MultiDiffEngine {
761    fn default() -> Self {
762        Self::new()
763    }
764}
765
766/// Classify security impact based on component name
767fn classify_security_impact(name: &str) -> SecurityImpact {
768    let name_lower = name.to_lowercase();
769    let critical_components = [
770        "openssl",
771        "curl",
772        "libcurl",
773        "gnutls",
774        "mbedtls",
775        "wolfssl",
776        "boringssl",
777    ];
778    let high_components = [
779        "zlib", "libssh", "openssh", "gnupg", "gpg", "sqlite", "kernel", "glibc",
780    ];
781
782    if critical_components.iter().any(|c| name_lower.contains(c)) {
783        SecurityImpact::Critical
784    } else if high_components.iter().any(|c| name_lower.contains(c)) {
785        SecurityImpact::High
786    } else {
787        SecurityImpact::Low
788    }
789}
790
791/// Calculate major version spread from a list of version strings
792fn calculate_major_version_spread(versions: &[String]) -> u32 {
793    let mut major_versions: HashSet<u64> = HashSet::new();
794
795    for version in versions {
796        // Try to parse as semver first
797        if let Ok(v) = semver::Version::parse(version) {
798            major_versions.insert(v.major);
799        } else {
800            // Fallback: try to extract leading number
801            if let Some(major_str) = version.split(['.', '-', '_']).next()
802                && let Ok(major) = major_str.parse::<u64>()
803            {
804                major_versions.insert(major);
805            }
806        }
807    }
808
809    match (major_versions.iter().min(), major_versions.iter().max()) {
810        (Some(&min), Some(&max)) => (max - min) as u32,
811        _ => 0,
812    }
813}
814
815/// Compute vulnerability matrix with unique and common vulnerabilities
816fn compute_vulnerability_matrix(
817    baseline: &NormalizedSbom,
818    baseline_name: &str,
819    targets: &[(&NormalizedSbom, &str, &str)],
820) -> VulnerabilityMatrix {
821    // Collect all vulnerabilities per SBOM
822    let mut vuln_sets: HashMap<String, HashSet<String>> = HashMap::new();
823    let mut per_sbom: HashMap<String, VulnerabilityCounts> = HashMap::new();
824
825    // Baseline vulnerabilities
826    let baseline_vulns: HashSet<String> = baseline
827        .all_vulnerabilities()
828        .iter()
829        .map(|(_, v)| v.id.clone())
830        .collect();
831    vuln_sets.insert(baseline_name.to_string(), baseline_vulns);
832    per_sbom.insert(baseline_name.to_string(), baseline.vulnerability_counts());
833
834    // Target vulnerabilities
835    for (sbom, name, _) in targets {
836        let target_vulns: HashSet<String> = sbom
837            .all_vulnerabilities()
838            .iter()
839            .map(|(_, v)| v.id.clone())
840            .collect();
841        vuln_sets.insert(name.to_string(), target_vulns);
842        per_sbom.insert(name.to_string(), sbom.vulnerability_counts());
843    }
844
845    // Find common vulnerabilities (in ALL SBOMs)
846    let mut common_vulnerabilities: HashSet<String> =
847        vuln_sets.values().next().cloned().unwrap_or_default();
848
849    for vulns in vuln_sets.values() {
850        common_vulnerabilities = common_vulnerabilities
851            .intersection(vulns)
852            .cloned()
853            .collect();
854    }
855
856    // Find unique vulnerabilities per SBOM
857    let mut unique_vulnerabilities: HashMap<String, Vec<String>> = HashMap::new();
858
859    for (sbom_name, vulns) in &vuln_sets {
860        let mut unique: HashSet<String> = vulns.clone();
861
862        // Remove vulnerabilities that exist in any other SBOM
863        for (other_name, other_vulns) in &vuln_sets {
864            if other_name != sbom_name {
865                unique = unique.difference(other_vulns).cloned().collect();
866            }
867        }
868
869        if !unique.is_empty() {
870            unique_vulnerabilities.insert(sbom_name.clone(), unique.into_iter().collect());
871        }
872    }
873
874    VulnerabilityMatrix {
875        per_sbom,
876        unique_vulnerabilities,
877        common_vulnerabilities: common_vulnerabilities.into_iter().collect(),
878    }
879}
880
881/// Classify version change type
882fn classify_version_change(old: Option<&String>, new: Option<&String>) -> VersionChangeType {
883    match (old, new) {
884        (None, Some(_)) => VersionChangeType::Initial,
885        (Some(_), None) => VersionChangeType::Removed,
886        (Some(o), Some(n)) if o == n => VersionChangeType::Unchanged,
887        (Some(o), Some(n)) => {
888            // Try to parse as semver
889            if let (Ok(old_v), Ok(new_v)) = (semver::Version::parse(o), semver::Version::parse(n)) {
890                if new_v.major > old_v.major {
891                    VersionChangeType::MajorUpgrade
892                } else if new_v.major < old_v.major {
893                    VersionChangeType::Downgrade
894                } else if new_v.minor > old_v.minor {
895                    VersionChangeType::MinorUpgrade
896                } else if new_v.minor < old_v.minor {
897                    VersionChangeType::Downgrade
898                } else if new_v.patch > old_v.patch {
899                    VersionChangeType::PatchUpgrade
900                } else {
901                    VersionChangeType::Downgrade
902                }
903            } else {
904                // String comparison fallback
905                if n > o {
906                    VersionChangeType::PatchUpgrade
907                } else {
908                    VersionChangeType::Downgrade
909                }
910            }
911        }
912        (None, None) => VersionChangeType::Absent,
913    }
914}