Skip to main content

sbom_tools/model/
sbom.rs

1//! Core SBOM and Component data structures.
2
3use super::{
4    CanonicalId, ComponentExtensions, ComponentIdentifiers, ComponentType, DependencyScope,
5    DependencyType, DocumentMetadata, Ecosystem, ExternalReference, FormatExtensions, Hash,
6    LicenseInfo, Organization, VexStatus, VulnerabilityRef,
7};
8use indexmap::IndexMap;
9use serde::{Deserialize, Serialize};
10use xxhash_rust::xxh3::xxh3_64;
11
12/// Normalized SBOM document - the canonical intermediate representation.
13///
14/// This structure represents an SBOM in a format-agnostic way, allowing
15/// comparison between `CycloneDX` and SPDX documents.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct NormalizedSbom {
18    /// Document-level metadata
19    pub document: DocumentMetadata,
20    /// Components indexed by canonical ID
21    pub components: IndexMap<CanonicalId, Component>,
22    /// Dependency edges
23    pub edges: Vec<DependencyEdge>,
24    /// Format-specific extensions
25    pub extensions: FormatExtensions,
26    /// Content hash for quick equality checks
27    pub content_hash: u64,
28    /// Primary/root product component (`CycloneDX` metadata.component or SPDX documentDescribes)
29    /// This identifies the main product that this SBOM describes, important for CRA compliance.
30    pub primary_component_id: Option<CanonicalId>,
31    /// Number of canonical ID collisions encountered during parsing
32    #[serde(skip)]
33    pub collision_count: usize,
34}
35
36impl NormalizedSbom {
37    /// Create a new empty normalized SBOM
38    #[must_use] 
39    pub fn new(document: DocumentMetadata) -> Self {
40        Self {
41            document,
42            components: IndexMap::new(),
43            edges: Vec::new(),
44            extensions: FormatExtensions::default(),
45            content_hash: 0,
46            primary_component_id: None,
47            collision_count: 0,
48        }
49    }
50
51    /// Add a component to the SBOM.
52    ///
53    /// Returns `true` if a collision occurred (a component with the same canonical ID
54    /// was already present and has been overwritten). Collisions are logged as warnings.
55    pub fn add_component(&mut self, component: Component) -> bool {
56        let id = component.canonical_id.clone();
57        if let Some(existing) = self.components.get(&id) {
58            // Count genuinely different components that collide on canonical ID
59            if existing.identifiers.format_id != component.identifiers.format_id
60                || existing.name != component.name
61            {
62                self.collision_count += 1;
63            }
64            self.components.insert(id, component);
65            true
66        } else {
67            self.components.insert(id, component);
68            false
69        }
70    }
71
72    /// Log a single summary line if any canonical ID collisions occurred during parsing.
73    pub fn log_collision_summary(&self) {
74        if self.collision_count > 0 {
75            tracing::info!(
76                collision_count = self.collision_count,
77                "Canonical ID collisions: {} distinct components resolved to the same ID \
78                 and were overwritten. Consider adding PURL identifiers to disambiguate.",
79                self.collision_count
80            );
81        }
82    }
83
84    /// Add a dependency edge
85    pub fn add_edge(&mut self, edge: DependencyEdge) {
86        self.edges.push(edge);
87    }
88
89    /// Get a component by canonical ID
90    #[must_use] 
91    pub fn get_component(&self, id: &CanonicalId) -> Option<&Component> {
92        self.components.get(id)
93    }
94
95    /// Get dependencies of a component
96    #[must_use] 
97    pub fn get_dependencies(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
98        self.edges.iter().filter(|e| &e.from == id).collect()
99    }
100
101    /// Get dependents of a component
102    #[must_use] 
103    pub fn get_dependents(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
104        self.edges.iter().filter(|e| &e.to == id).collect()
105    }
106
107    /// Calculate and update the content hash
108    pub fn calculate_content_hash(&mut self) {
109        let mut hasher_input = Vec::new();
110
111        // Hash document metadata
112        if let Ok(meta_json) = serde_json::to_vec(&self.document) {
113            hasher_input.extend(meta_json);
114        }
115
116        // Hash all components (sorted for determinism)
117        let mut component_ids: Vec<_> = self.components.keys().collect();
118        component_ids.sort_by(|a, b| a.value().cmp(b.value()));
119
120        for id in component_ids {
121            if let Some(comp) = self.components.get(id) {
122                hasher_input.extend(comp.content_hash.to_le_bytes());
123            }
124        }
125
126        // Hash edges
127        for edge in &self.edges {
128            hasher_input.extend(edge.from.value().as_bytes());
129            hasher_input.extend(edge.to.value().as_bytes());
130        }
131
132        self.content_hash = xxh3_64(&hasher_input);
133    }
134
135    /// Get total component count
136    #[must_use] 
137    pub fn component_count(&self) -> usize {
138        self.components.len()
139    }
140
141    /// Get the primary/root product component if set
142    #[must_use] 
143    pub fn primary_component(&self) -> Option<&Component> {
144        self.primary_component_id
145            .as_ref()
146            .and_then(|id| self.components.get(id))
147    }
148
149    /// Set the primary component by its canonical ID
150    pub fn set_primary_component(&mut self, id: CanonicalId) {
151        self.primary_component_id = Some(id);
152    }
153
154    /// Get all unique ecosystems in the SBOM
155    pub fn ecosystems(&self) -> Vec<&Ecosystem> {
156        let mut ecosystems: Vec<_> = self
157            .components
158            .values()
159            .filter_map(|c| c.ecosystem.as_ref())
160            .collect();
161        ecosystems.sort_by_key(std::string::ToString::to_string);
162        ecosystems.dedup();
163        ecosystems
164    }
165
166    /// Get all vulnerabilities across all components
167    #[must_use] 
168    pub fn all_vulnerabilities(&self) -> Vec<(&Component, &VulnerabilityRef)> {
169        self.components
170            .values()
171            .flat_map(|c| c.vulnerabilities.iter().map(move |v| (c, v)))
172            .collect()
173    }
174
175    /// Count vulnerabilities by severity
176    #[must_use] 
177    pub fn vulnerability_counts(&self) -> VulnerabilityCounts {
178        let mut counts = VulnerabilityCounts::default();
179        for (_, vuln) in self.all_vulnerabilities() {
180            match vuln.severity {
181                Some(super::Severity::Critical) => counts.critical += 1,
182                Some(super::Severity::High) => counts.high += 1,
183                Some(super::Severity::Medium) => counts.medium += 1,
184                Some(super::Severity::Low) => counts.low += 1,
185                _ => counts.unknown += 1,
186            }
187        }
188        counts
189    }
190
191    /// Build an index for this SBOM.
192    ///
193    /// The index provides O(1) lookups for dependencies, dependents,
194    /// and name-based searches. Build once and reuse for multiple operations.
195    ///
196    /// # Example
197    ///
198    /// ```ignore
199    /// let sbom = parse_sbom(&path)?;
200    /// let index = sbom.build_index();
201    ///
202    /// // Fast dependency lookup
203    /// let deps = index.dependencies_of(&component_id, &sbom.edges);
204    /// ```
205    pub fn build_index(&self) -> super::NormalizedSbomIndex {
206        super::NormalizedSbomIndex::build(self)
207    }
208
209    /// Get dependencies using an index (O(k) instead of O(edges)).
210    ///
211    /// Use this when you have a prebuilt index for repeated lookups.
212    #[must_use] 
213    pub fn get_dependencies_indexed<'a>(
214        &'a self,
215        id: &CanonicalId,
216        index: &super::NormalizedSbomIndex,
217    ) -> Vec<&'a DependencyEdge> {
218        index.dependencies_of(id, &self.edges)
219    }
220
221    /// Get dependents using an index (O(k) instead of O(edges)).
222    ///
223    /// Use this when you have a prebuilt index for repeated lookups.
224    #[must_use] 
225    pub fn get_dependents_indexed<'a>(
226        &'a self,
227        id: &CanonicalId,
228        index: &super::NormalizedSbomIndex,
229    ) -> Vec<&'a DependencyEdge> {
230        index.dependents_of(id, &self.edges)
231    }
232
233    /// Find components by name (case-insensitive) using an index.
234    ///
235    /// Returns components whose lowercased name exactly matches the query.
236    #[must_use] 
237    pub fn find_by_name_indexed(
238        &self,
239        name: &str,
240        index: &super::NormalizedSbomIndex,
241    ) -> Vec<&Component> {
242        let name_lower = name.to_lowercase();
243        index
244            .find_by_name_lower(&name_lower)
245            .iter()
246            .filter_map(|id| self.components.get(id))
247            .collect()
248    }
249
250    /// Search components by name (case-insensitive substring) using an index.
251    ///
252    /// Returns components whose name contains the query substring.
253    #[must_use] 
254    pub fn search_by_name_indexed(
255        &self,
256        query: &str,
257        index: &super::NormalizedSbomIndex,
258    ) -> Vec<&Component> {
259        let query_lower = query.to_lowercase();
260        index
261            .search_by_name(&query_lower)
262            .iter()
263            .filter_map(|id| self.components.get(id))
264            .collect()
265    }
266
267    /// Apply CRA sidecar metadata to supplement SBOM fields.
268    ///
269    /// Sidecar values only override SBOM fields if the SBOM field is None/empty.
270    /// This ensures SBOM data takes precedence when available.
271    pub fn apply_cra_sidecar(&mut self, sidecar: &super::CraSidecarMetadata) {
272        // Only apply if SBOM doesn't already have the value
273        if self.document.security_contact.is_none() {
274            self.document.security_contact.clone_from(&sidecar.security_contact);
275        }
276
277        if self.document.vulnerability_disclosure_url.is_none() {
278            self.document
279                .vulnerability_disclosure_url
280                .clone_from(&sidecar.vulnerability_disclosure_url);
281        }
282
283        if self.document.support_end_date.is_none() {
284            self.document.support_end_date = sidecar.support_end_date;
285        }
286
287        if self.document.name.is_none() {
288            self.document.name.clone_from(&sidecar.product_name);
289        }
290
291        // Add manufacturer as creator if not present
292        if let Some(manufacturer) = &sidecar.manufacturer_name {
293            let has_org = self
294                .document
295                .creators
296                .iter()
297                .any(|c| c.creator_type == super::CreatorType::Organization);
298
299            if !has_org {
300                self.document.creators.push(super::Creator {
301                    creator_type: super::CreatorType::Organization,
302                    name: manufacturer.clone(),
303                    email: sidecar.manufacturer_email.clone(),
304                });
305            }
306        }
307    }
308}
309
310impl Default for NormalizedSbom {
311    fn default() -> Self {
312        Self::new(DocumentMetadata::default())
313    }
314}
315
316/// Vulnerability counts by severity
317#[derive(Debug, Clone, Default, Serialize, Deserialize)]
318pub struct VulnerabilityCounts {
319    pub critical: usize,
320    pub high: usize,
321    pub medium: usize,
322    pub low: usize,
323    pub unknown: usize,
324}
325
326impl VulnerabilityCounts {
327    #[must_use] 
328    pub const fn total(&self) -> usize {
329        self.critical + self.high + self.medium + self.low + self.unknown
330    }
331}
332
333/// Staleness level classification for dependencies
334#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
335#[non_exhaustive]
336pub enum StalenessLevel {
337    /// Updated within 6 months
338    Fresh,
339    /// 6-12 months since last update
340    Aging,
341    /// 1-2 years since last update
342    Stale,
343    /// More than 2 years since last update
344    Abandoned,
345    /// Explicitly marked as deprecated
346    Deprecated,
347    /// Repository/package archived
348    Archived,
349}
350
351impl StalenessLevel {
352    /// Create from age in days
353    #[must_use] 
354    pub const fn from_days(days: u32) -> Self {
355        match days {
356            0..=182 => Self::Fresh,      // ~6 months
357            183..=365 => Self::Aging,    // 6-12 months
358            366..=730 => Self::Stale,    // 1-2 years
359            _ => Self::Abandoned,        // >2 years
360        }
361    }
362
363    /// Get display label
364    #[must_use] 
365    pub const fn label(&self) -> &'static str {
366        match self {
367            Self::Fresh => "Fresh",
368            Self::Aging => "Aging",
369            Self::Stale => "Stale",
370            Self::Abandoned => "Abandoned",
371            Self::Deprecated => "Deprecated",
372            Self::Archived => "Archived",
373        }
374    }
375
376    /// Get icon for TUI display
377    #[must_use] 
378    pub const fn icon(&self) -> &'static str {
379        match self {
380            Self::Fresh => "✓",
381            Self::Aging => "⏳",
382            Self::Stale => "⚠",
383            Self::Abandoned => "⛔",
384            Self::Deprecated => "⊘",
385            Self::Archived => "📦",
386        }
387    }
388
389    /// Get severity weight (higher = worse)
390    #[must_use] 
391    pub const fn severity(&self) -> u8 {
392        match self {
393            Self::Fresh => 0,
394            Self::Aging => 1,
395            Self::Stale => 2,
396            Self::Abandoned => 3,
397            Self::Deprecated | Self::Archived => 4,
398        }
399    }
400}
401
402impl std::fmt::Display for StalenessLevel {
403    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
404        write!(f, "{}", self.label())
405    }
406}
407
408/// Staleness information for a component
409#[derive(Debug, Clone, Serialize, Deserialize)]
410pub struct StalenessInfo {
411    /// Staleness classification
412    pub level: StalenessLevel,
413    /// Last publish/release date
414    pub last_published: Option<chrono::DateTime<chrono::Utc>>,
415    /// Whether explicitly deprecated by maintainer
416    pub is_deprecated: bool,
417    /// Whether repository/package is archived
418    pub is_archived: bool,
419    /// Deprecation message if available
420    pub deprecation_message: Option<String>,
421    /// Days since last update
422    pub days_since_update: Option<u32>,
423    /// Latest available version (if different from current)
424    pub latest_version: Option<String>,
425}
426
427impl StalenessInfo {
428    /// Create new staleness info
429    #[must_use] 
430    pub const fn new(level: StalenessLevel) -> Self {
431        Self {
432            level,
433            last_published: None,
434            is_deprecated: false,
435            is_archived: false,
436            deprecation_message: None,
437            days_since_update: None,
438            latest_version: None,
439        }
440    }
441
442    /// Create from last published date
443    #[must_use] 
444    pub fn from_date(last_published: chrono::DateTime<chrono::Utc>) -> Self {
445        let days = (chrono::Utc::now() - last_published).num_days() as u32;
446        let level = StalenessLevel::from_days(days);
447        Self {
448            level,
449            last_published: Some(last_published),
450            is_deprecated: false,
451            is_archived: false,
452            deprecation_message: None,
453            days_since_update: Some(days),
454            latest_version: None,
455        }
456    }
457
458    /// Check if component needs attention (stale or worse)
459    #[must_use] 
460    pub const fn needs_attention(&self) -> bool {
461        self.level.severity() >= 2
462    }
463}
464
465/// Component in the normalized SBOM
466#[derive(Debug, Clone, Serialize, Deserialize)]
467pub struct Component {
468    /// Canonical identifier
469    pub canonical_id: CanonicalId,
470    /// Various identifiers (PURL, CPE, etc.)
471    pub identifiers: ComponentIdentifiers,
472    /// Component name
473    pub name: String,
474    /// Version string
475    pub version: Option<String>,
476    /// Parsed semantic version (if valid)
477    pub semver: Option<semver::Version>,
478    /// Component type
479    pub component_type: ComponentType,
480    /// Package ecosystem
481    pub ecosystem: Option<Ecosystem>,
482    /// License information
483    pub licenses: LicenseInfo,
484    /// Supplier/vendor information
485    pub supplier: Option<Organization>,
486    /// Cryptographic hashes
487    pub hashes: Vec<Hash>,
488    /// External references
489    pub external_refs: Vec<ExternalReference>,
490    /// Known vulnerabilities
491    pub vulnerabilities: Vec<VulnerabilityRef>,
492    /// VEX status
493    pub vex_status: Option<VexStatus>,
494    /// Content hash for quick comparison
495    pub content_hash: u64,
496    /// Format-specific extensions
497    pub extensions: ComponentExtensions,
498    /// Description
499    pub description: Option<String>,
500    /// Copyright text
501    pub copyright: Option<String>,
502    /// Author information
503    pub author: Option<String>,
504    /// Group/namespace (e.g., Maven groupId)
505    pub group: Option<String>,
506    /// Staleness information (populated by enrichment)
507    pub staleness: Option<StalenessInfo>,
508}
509
510impl Component {
511    /// Create a new component with minimal required fields
512    #[must_use] 
513    pub fn new(name: String, format_id: String) -> Self {
514        let identifiers = ComponentIdentifiers::new(format_id);
515        let canonical_id = identifiers.canonical_id();
516
517        Self {
518            canonical_id,
519            identifiers,
520            name,
521            version: None,
522            semver: None,
523            component_type: ComponentType::Library,
524            ecosystem: None,
525            licenses: LicenseInfo::default(),
526            supplier: None,
527            hashes: Vec::new(),
528            external_refs: Vec::new(),
529            vulnerabilities: Vec::new(),
530            vex_status: None,
531            content_hash: 0,
532            extensions: ComponentExtensions::default(),
533            description: None,
534            copyright: None,
535            author: None,
536            group: None,
537            staleness: None,
538        }
539    }
540
541    /// Set the PURL and update canonical ID
542    #[must_use]
543    pub fn with_purl(mut self, purl: String) -> Self {
544        self.identifiers.purl = Some(purl);
545        self.canonical_id = self.identifiers.canonical_id();
546
547        // Try to extract ecosystem from PURL
548        if let Some(purl_str) = &self.identifiers.purl {
549            if let Some(purl_type) = purl_str
550                .strip_prefix("pkg:")
551                .and_then(|s| s.split('/').next())
552            {
553                self.ecosystem = Some(Ecosystem::from_purl_type(purl_type));
554            }
555        }
556
557        self
558    }
559
560    /// Set the version and try to parse as semver
561    #[must_use]
562    pub fn with_version(mut self, version: String) -> Self {
563        self.semver = semver::Version::parse(&version).ok();
564        self.version = Some(version);
565        self
566    }
567
568    /// Calculate and update content hash
569    pub fn calculate_content_hash(&mut self) {
570        let mut hasher_input = Vec::new();
571
572        hasher_input.extend(self.name.as_bytes());
573        if let Some(v) = &self.version {
574            hasher_input.extend(v.as_bytes());
575        }
576        if let Some(purl) = &self.identifiers.purl {
577            hasher_input.extend(purl.as_bytes());
578        }
579        for license in &self.licenses.declared {
580            hasher_input.extend(license.expression.as_bytes());
581        }
582        if let Some(supplier) = &self.supplier {
583            hasher_input.extend(supplier.name.as_bytes());
584        }
585        for hash in &self.hashes {
586            hasher_input.extend(hash.value.as_bytes());
587        }
588        for vuln in &self.vulnerabilities {
589            hasher_input.extend(vuln.id.as_bytes());
590        }
591
592        self.content_hash = xxh3_64(&hasher_input);
593    }
594
595    /// Check if this is an OSS (open source) component
596    #[must_use] 
597    pub fn is_oss(&self) -> bool {
598        // Check if any declared license is OSS
599        self.licenses.declared.iter().any(|l| l.is_valid_spdx) || self.identifiers.purl.is_some()
600    }
601
602    /// Get display name with version
603    #[must_use] 
604    pub fn display_name(&self) -> String {
605        self.version.as_ref().map_or_else(|| self.name.clone(), |v| format!("{}@{}", self.name, v))
606    }
607}
608
609/// Dependency edge between components
610#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
611pub struct DependencyEdge {
612    /// Source component
613    pub from: CanonicalId,
614    /// Target component
615    pub to: CanonicalId,
616    /// Relationship type
617    pub relationship: DependencyType,
618    /// Dependency scope
619    pub scope: Option<DependencyScope>,
620}
621
622impl DependencyEdge {
623    /// Create a new dependency edge
624    #[must_use] 
625    pub const fn new(from: CanonicalId, to: CanonicalId, relationship: DependencyType) -> Self {
626        Self {
627            from,
628            to,
629            relationship,
630            scope: None,
631        }
632    }
633
634    /// Check if this is a direct dependency
635    #[must_use] 
636    pub const fn is_direct(&self) -> bool {
637        matches!(
638            self.relationship,
639            DependencyType::DependsOn
640                | DependencyType::DevDependsOn
641                | DependencyType::BuildDependsOn
642                | DependencyType::TestDependsOn
643                | DependencyType::RuntimeDependsOn
644        )
645    }
646}