Skip to main content

sbom_tools/model/
sbom.rs

1//! Core SBOM and Component data structures.
2
3use super::{
4    CanonicalId, ComponentExtensions, ComponentIdentifiers, ComponentType, DependencyScope,
5    DependencyType, DocumentMetadata, Ecosystem, ExternalReference, FormatExtensions, Hash,
6    LicenseInfo, Organization, VexStatus, VulnerabilityRef,
7};
8use indexmap::IndexMap;
9use serde::{Deserialize, Serialize};
10use xxhash_rust::xxh3::xxh3_64;
11
12/// Normalized SBOM document - the canonical intermediate representation.
13///
14/// This structure represents an SBOM in a format-agnostic way, allowing
15/// comparison between CycloneDX and SPDX documents.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct NormalizedSbom {
18    /// Document-level metadata
19    pub document: DocumentMetadata,
20    /// Components indexed by canonical ID
21    pub components: IndexMap<CanonicalId, Component>,
22    /// Dependency edges
23    pub edges: Vec<DependencyEdge>,
24    /// Format-specific extensions
25    pub extensions: FormatExtensions,
26    /// Content hash for quick equality checks
27    pub content_hash: u64,
28    /// Primary/root product component (CycloneDX metadata.component or SPDX documentDescribes)
29    /// This identifies the main product that this SBOM describes, important for CRA compliance.
30    pub primary_component_id: Option<CanonicalId>,
31    /// Number of canonical ID collisions encountered during parsing
32    #[serde(skip)]
33    pub collision_count: usize,
34}
35
36impl NormalizedSbom {
37    /// Create a new empty normalized SBOM
38    pub fn new(document: DocumentMetadata) -> Self {
39        Self {
40            document,
41            components: IndexMap::new(),
42            edges: Vec::new(),
43            extensions: FormatExtensions::default(),
44            content_hash: 0,
45            primary_component_id: None,
46            collision_count: 0,
47        }
48    }
49
50    /// Add a component to the SBOM.
51    ///
52    /// Returns `true` if a collision occurred (a component with the same canonical ID
53    /// was already present and has been overwritten). Collisions are logged as warnings.
54    pub fn add_component(&mut self, component: Component) -> bool {
55        let id = component.canonical_id.clone();
56        if let Some(existing) = self.components.get(&id) {
57            // Count genuinely different components that collide on canonical ID
58            if existing.identifiers.format_id != component.identifiers.format_id
59                || existing.name != component.name
60            {
61                self.collision_count += 1;
62            }
63            self.components.insert(id, component);
64            true
65        } else {
66            self.components.insert(id, component);
67            false
68        }
69    }
70
71    /// Log a single summary line if any canonical ID collisions occurred during parsing.
72    pub fn log_collision_summary(&self) {
73        if self.collision_count > 0 {
74            tracing::info!(
75                collision_count = self.collision_count,
76                "Canonical ID collisions: {} distinct components resolved to the same ID \
77                 and were overwritten. Consider adding PURL identifiers to disambiguate.",
78                self.collision_count
79            );
80        }
81    }
82
83    /// Add a dependency edge
84    pub fn add_edge(&mut self, edge: DependencyEdge) {
85        self.edges.push(edge);
86    }
87
88    /// Get a component by canonical ID
89    pub fn get_component(&self, id: &CanonicalId) -> Option<&Component> {
90        self.components.get(id)
91    }
92
93    /// Get dependencies of a component
94    pub fn get_dependencies(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
95        self.edges.iter().filter(|e| &e.from == id).collect()
96    }
97
98    /// Get dependents of a component
99    pub fn get_dependents(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
100        self.edges.iter().filter(|e| &e.to == id).collect()
101    }
102
103    /// Calculate and update the content hash
104    pub fn calculate_content_hash(&mut self) {
105        let mut hasher_input = Vec::new();
106
107        // Hash document metadata
108        if let Ok(meta_json) = serde_json::to_vec(&self.document) {
109            hasher_input.extend(meta_json);
110        }
111
112        // Hash all components (sorted for determinism)
113        let mut component_ids: Vec<_> = self.components.keys().collect();
114        component_ids.sort_by(|a, b| a.value().cmp(b.value()));
115
116        for id in component_ids {
117            if let Some(comp) = self.components.get(id) {
118                hasher_input.extend(comp.content_hash.to_le_bytes());
119            }
120        }
121
122        // Hash edges
123        for edge in &self.edges {
124            hasher_input.extend(edge.from.value().as_bytes());
125            hasher_input.extend(edge.to.value().as_bytes());
126        }
127
128        self.content_hash = xxh3_64(&hasher_input);
129    }
130
131    /// Get total component count
132    pub fn component_count(&self) -> usize {
133        self.components.len()
134    }
135
136    /// Get the primary/root product component if set
137    pub fn primary_component(&self) -> Option<&Component> {
138        self.primary_component_id
139            .as_ref()
140            .and_then(|id| self.components.get(id))
141    }
142
143    /// Set the primary component by its canonical ID
144    pub fn set_primary_component(&mut self, id: CanonicalId) {
145        self.primary_component_id = Some(id);
146    }
147
148    /// Get all unique ecosystems in the SBOM
149    pub fn ecosystems(&self) -> Vec<&Ecosystem> {
150        let mut ecosystems: Vec<_> = self
151            .components
152            .values()
153            .filter_map(|c| c.ecosystem.as_ref())
154            .collect();
155        ecosystems.sort_by_key(|a| a.to_string());
156        ecosystems.dedup();
157        ecosystems
158    }
159
160    /// Get all vulnerabilities across all components
161    pub fn all_vulnerabilities(&self) -> Vec<(&Component, &VulnerabilityRef)> {
162        self.components
163            .values()
164            .flat_map(|c| c.vulnerabilities.iter().map(move |v| (c, v)))
165            .collect()
166    }
167
168    /// Count vulnerabilities by severity
169    pub fn vulnerability_counts(&self) -> VulnerabilityCounts {
170        let mut counts = VulnerabilityCounts::default();
171        for (_, vuln) in self.all_vulnerabilities() {
172            match vuln.severity {
173                Some(super::Severity::Critical) => counts.critical += 1,
174                Some(super::Severity::High) => counts.high += 1,
175                Some(super::Severity::Medium) => counts.medium += 1,
176                Some(super::Severity::Low) => counts.low += 1,
177                _ => counts.unknown += 1,
178            }
179        }
180        counts
181    }
182
183    /// Build an index for this SBOM.
184    ///
185    /// The index provides O(1) lookups for dependencies, dependents,
186    /// and name-based searches. Build once and reuse for multiple operations.
187    ///
188    /// # Example
189    ///
190    /// ```ignore
191    /// let sbom = parse_sbom(&path)?;
192    /// let index = sbom.build_index();
193    ///
194    /// // Fast dependency lookup
195    /// let deps = index.dependencies_of(&component_id, &sbom.edges);
196    /// ```
197    pub fn build_index(&self) -> super::NormalizedSbomIndex {
198        super::NormalizedSbomIndex::build(self)
199    }
200
201    /// Get dependencies using an index (O(k) instead of O(edges)).
202    ///
203    /// Use this when you have a prebuilt index for repeated lookups.
204    pub fn get_dependencies_indexed<'a>(
205        &'a self,
206        id: &CanonicalId,
207        index: &super::NormalizedSbomIndex,
208    ) -> Vec<&'a DependencyEdge> {
209        index.dependencies_of(id, &self.edges)
210    }
211
212    /// Get dependents using an index (O(k) instead of O(edges)).
213    ///
214    /// Use this when you have a prebuilt index for repeated lookups.
215    pub fn get_dependents_indexed<'a>(
216        &'a self,
217        id: &CanonicalId,
218        index: &super::NormalizedSbomIndex,
219    ) -> Vec<&'a DependencyEdge> {
220        index.dependents_of(id, &self.edges)
221    }
222
223    /// Find components by name (case-insensitive) using an index.
224    ///
225    /// Returns components whose lowercased name exactly matches the query.
226    pub fn find_by_name_indexed(
227        &self,
228        name: &str,
229        index: &super::NormalizedSbomIndex,
230    ) -> Vec<&Component> {
231        let name_lower = name.to_lowercase();
232        index
233            .find_by_name_lower(&name_lower)
234            .iter()
235            .filter_map(|id| self.components.get(id))
236            .collect()
237    }
238
239    /// Search components by name (case-insensitive substring) using an index.
240    ///
241    /// Returns components whose name contains the query substring.
242    pub fn search_by_name_indexed(
243        &self,
244        query: &str,
245        index: &super::NormalizedSbomIndex,
246    ) -> Vec<&Component> {
247        let query_lower = query.to_lowercase();
248        index
249            .search_by_name(&query_lower)
250            .iter()
251            .filter_map(|id| self.components.get(id))
252            .collect()
253    }
254
255    /// Apply CRA sidecar metadata to supplement SBOM fields.
256    ///
257    /// Sidecar values only override SBOM fields if the SBOM field is None/empty.
258    /// This ensures SBOM data takes precedence when available.
259    pub fn apply_cra_sidecar(&mut self, sidecar: &super::CraSidecarMetadata) {
260        // Only apply if SBOM doesn't already have the value
261        if self.document.security_contact.is_none() {
262            self.document.security_contact = sidecar.security_contact.clone();
263        }
264
265        if self.document.vulnerability_disclosure_url.is_none() {
266            self.document.vulnerability_disclosure_url =
267                sidecar.vulnerability_disclosure_url.clone();
268        }
269
270        if self.document.support_end_date.is_none() {
271            self.document.support_end_date = sidecar.support_end_date;
272        }
273
274        if self.document.name.is_none() {
275            self.document.name = sidecar.product_name.clone();
276        }
277
278        // Add manufacturer as creator if not present
279        if let Some(manufacturer) = &sidecar.manufacturer_name {
280            let has_org = self
281                .document
282                .creators
283                .iter()
284                .any(|c| c.creator_type == super::CreatorType::Organization);
285
286            if !has_org {
287                self.document.creators.push(super::Creator {
288                    creator_type: super::CreatorType::Organization,
289                    name: manufacturer.clone(),
290                    email: sidecar.manufacturer_email.clone(),
291                });
292            }
293        }
294    }
295}
296
297impl Default for NormalizedSbom {
298    fn default() -> Self {
299        Self::new(DocumentMetadata::default())
300    }
301}
302
303/// Vulnerability counts by severity
304#[derive(Debug, Clone, Default, Serialize, Deserialize)]
305pub struct VulnerabilityCounts {
306    pub critical: usize,
307    pub high: usize,
308    pub medium: usize,
309    pub low: usize,
310    pub unknown: usize,
311}
312
313impl VulnerabilityCounts {
314    pub fn total(&self) -> usize {
315        self.critical + self.high + self.medium + self.low + self.unknown
316    }
317}
318
319/// Staleness level classification for dependencies
320#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
321pub enum StalenessLevel {
322    /// Updated within 6 months
323    Fresh,
324    /// 6-12 months since last update
325    Aging,
326    /// 1-2 years since last update
327    Stale,
328    /// More than 2 years since last update
329    Abandoned,
330    /// Explicitly marked as deprecated
331    Deprecated,
332    /// Repository/package archived
333    Archived,
334}
335
336impl StalenessLevel {
337    /// Create from age in days
338    pub fn from_days(days: u32) -> Self {
339        match days {
340            0..=182 => StalenessLevel::Fresh,      // ~6 months
341            183..=365 => StalenessLevel::Aging,    // 6-12 months
342            366..=730 => StalenessLevel::Stale,    // 1-2 years
343            _ => StalenessLevel::Abandoned,        // >2 years
344        }
345    }
346
347    /// Get display label
348    pub fn label(&self) -> &'static str {
349        match self {
350            StalenessLevel::Fresh => "Fresh",
351            StalenessLevel::Aging => "Aging",
352            StalenessLevel::Stale => "Stale",
353            StalenessLevel::Abandoned => "Abandoned",
354            StalenessLevel::Deprecated => "Deprecated",
355            StalenessLevel::Archived => "Archived",
356        }
357    }
358
359    /// Get icon for TUI display
360    pub fn icon(&self) -> &'static str {
361        match self {
362            StalenessLevel::Fresh => "✓",
363            StalenessLevel::Aging => "⏳",
364            StalenessLevel::Stale => "⚠",
365            StalenessLevel::Abandoned => "⛔",
366            StalenessLevel::Deprecated => "⊘",
367            StalenessLevel::Archived => "📦",
368        }
369    }
370
371    /// Get severity weight (higher = worse)
372    pub fn severity(&self) -> u8 {
373        match self {
374            StalenessLevel::Fresh => 0,
375            StalenessLevel::Aging => 1,
376            StalenessLevel::Stale => 2,
377            StalenessLevel::Abandoned => 3,
378            StalenessLevel::Deprecated => 4,
379            StalenessLevel::Archived => 4,
380        }
381    }
382}
383
384impl std::fmt::Display for StalenessLevel {
385    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
386        write!(f, "{}", self.label())
387    }
388}
389
390/// Staleness information for a component
391#[derive(Debug, Clone, Serialize, Deserialize)]
392pub struct StalenessInfo {
393    /// Staleness classification
394    pub level: StalenessLevel,
395    /// Last publish/release date
396    pub last_published: Option<chrono::DateTime<chrono::Utc>>,
397    /// Whether explicitly deprecated by maintainer
398    pub is_deprecated: bool,
399    /// Whether repository/package is archived
400    pub is_archived: bool,
401    /// Deprecation message if available
402    pub deprecation_message: Option<String>,
403    /// Days since last update
404    pub days_since_update: Option<u32>,
405    /// Latest available version (if different from current)
406    pub latest_version: Option<String>,
407}
408
409impl StalenessInfo {
410    /// Create new staleness info
411    pub fn new(level: StalenessLevel) -> Self {
412        Self {
413            level,
414            last_published: None,
415            is_deprecated: false,
416            is_archived: false,
417            deprecation_message: None,
418            days_since_update: None,
419            latest_version: None,
420        }
421    }
422
423    /// Create from last published date
424    pub fn from_date(last_published: chrono::DateTime<chrono::Utc>) -> Self {
425        let days = (chrono::Utc::now() - last_published).num_days() as u32;
426        let level = StalenessLevel::from_days(days);
427        Self {
428            level,
429            last_published: Some(last_published),
430            is_deprecated: false,
431            is_archived: false,
432            deprecation_message: None,
433            days_since_update: Some(days),
434            latest_version: None,
435        }
436    }
437
438    /// Check if component needs attention (stale or worse)
439    pub fn needs_attention(&self) -> bool {
440        self.level.severity() >= 2
441    }
442}
443
444/// Component in the normalized SBOM
445#[derive(Debug, Clone, Serialize, Deserialize)]
446pub struct Component {
447    /// Canonical identifier
448    pub canonical_id: CanonicalId,
449    /// Various identifiers (PURL, CPE, etc.)
450    pub identifiers: ComponentIdentifiers,
451    /// Component name
452    pub name: String,
453    /// Version string
454    pub version: Option<String>,
455    /// Parsed semantic version (if valid)
456    pub semver: Option<semver::Version>,
457    /// Component type
458    pub component_type: ComponentType,
459    /// Package ecosystem
460    pub ecosystem: Option<Ecosystem>,
461    /// License information
462    pub licenses: LicenseInfo,
463    /// Supplier/vendor information
464    pub supplier: Option<Organization>,
465    /// Cryptographic hashes
466    pub hashes: Vec<Hash>,
467    /// External references
468    pub external_refs: Vec<ExternalReference>,
469    /// Known vulnerabilities
470    pub vulnerabilities: Vec<VulnerabilityRef>,
471    /// VEX status
472    pub vex_status: Option<VexStatus>,
473    /// Content hash for quick comparison
474    pub content_hash: u64,
475    /// Format-specific extensions
476    pub extensions: ComponentExtensions,
477    /// Description
478    pub description: Option<String>,
479    /// Copyright text
480    pub copyright: Option<String>,
481    /// Author information
482    pub author: Option<String>,
483    /// Group/namespace (e.g., Maven groupId)
484    pub group: Option<String>,
485    /// Staleness information (populated by enrichment)
486    pub staleness: Option<StalenessInfo>,
487}
488
489impl Component {
490    /// Create a new component with minimal required fields
491    pub fn new(name: String, format_id: String) -> Self {
492        let identifiers = ComponentIdentifiers::new(format_id);
493        let canonical_id = identifiers.canonical_id();
494
495        Self {
496            canonical_id,
497            identifiers,
498            name,
499            version: None,
500            semver: None,
501            component_type: ComponentType::Library,
502            ecosystem: None,
503            licenses: LicenseInfo::default(),
504            supplier: None,
505            hashes: Vec::new(),
506            external_refs: Vec::new(),
507            vulnerabilities: Vec::new(),
508            vex_status: None,
509            content_hash: 0,
510            extensions: ComponentExtensions::default(),
511            description: None,
512            copyright: None,
513            author: None,
514            group: None,
515            staleness: None,
516        }
517    }
518
519    /// Set the PURL and update canonical ID
520    pub fn with_purl(mut self, purl: String) -> Self {
521        self.identifiers.purl = Some(purl);
522        self.canonical_id = self.identifiers.canonical_id();
523
524        // Try to extract ecosystem from PURL
525        if let Some(purl_str) = &self.identifiers.purl {
526            if let Some(purl_type) = purl_str
527                .strip_prefix("pkg:")
528                .and_then(|s| s.split('/').next())
529            {
530                self.ecosystem = Some(Ecosystem::from_purl_type(purl_type));
531            }
532        }
533
534        self
535    }
536
537    /// Set the version and try to parse as semver
538    pub fn with_version(mut self, version: String) -> Self {
539        self.semver = semver::Version::parse(&version).ok();
540        self.version = Some(version);
541        self
542    }
543
544    /// Calculate and update content hash
545    pub fn calculate_content_hash(&mut self) {
546        let mut hasher_input = Vec::new();
547
548        hasher_input.extend(self.name.as_bytes());
549        if let Some(v) = &self.version {
550            hasher_input.extend(v.as_bytes());
551        }
552        if let Some(purl) = &self.identifiers.purl {
553            hasher_input.extend(purl.as_bytes());
554        }
555        for license in &self.licenses.declared {
556            hasher_input.extend(license.expression.as_bytes());
557        }
558        if let Some(supplier) = &self.supplier {
559            hasher_input.extend(supplier.name.as_bytes());
560        }
561        for hash in &self.hashes {
562            hasher_input.extend(hash.value.as_bytes());
563        }
564        for vuln in &self.vulnerabilities {
565            hasher_input.extend(vuln.id.as_bytes());
566        }
567
568        self.content_hash = xxh3_64(&hasher_input);
569    }
570
571    /// Check if this is an OSS (open source) component
572    pub fn is_oss(&self) -> bool {
573        // Check if any declared license is OSS
574        self.licenses.declared.iter().any(|l| l.is_valid_spdx) || self.identifiers.purl.is_some()
575    }
576
577    /// Get display name with version
578    pub fn display_name(&self) -> String {
579        match &self.version {
580            Some(v) => format!("{}@{}", self.name, v),
581            None => self.name.clone(),
582        }
583    }
584}
585
586/// Dependency edge between components
587#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
588pub struct DependencyEdge {
589    /// Source component
590    pub from: CanonicalId,
591    /// Target component
592    pub to: CanonicalId,
593    /// Relationship type
594    pub relationship: DependencyType,
595    /// Dependency scope
596    pub scope: Option<DependencyScope>,
597}
598
599impl DependencyEdge {
600    /// Create a new dependency edge
601    pub fn new(from: CanonicalId, to: CanonicalId, relationship: DependencyType) -> Self {
602        Self {
603            from,
604            to,
605            relationship,
606            scope: None,
607        }
608    }
609
610    /// Check if this is a direct dependency
611    pub fn is_direct(&self) -> bool {
612        matches!(
613            self.relationship,
614            DependencyType::DependsOn
615                | DependencyType::DevDependsOn
616                | DependencyType::BuildDependsOn
617                | DependencyType::TestDependsOn
618                | DependencyType::RuntimeDependsOn
619        )
620    }
621}