Skip to main content

sbom_tools/model/
sbom.rs

1//! Core SBOM and Component data structures.
2
3use super::{
4    CanonicalId, ComponentExtensions, ComponentIdentifiers, ComponentType, DependencyScope,
5    DependencyType, DocumentMetadata, Ecosystem, ExternalReference, FormatExtensions, Hash,
6    LicenseInfo, Organization, VexStatus, VulnerabilityRef,
7};
8use indexmap::IndexMap;
9use serde::{Deserialize, Serialize};
10use xxhash_rust::xxh3::xxh3_64;
11
12/// Normalized SBOM document - the canonical intermediate representation.
13///
14/// This structure represents an SBOM in a format-agnostic way, allowing
15/// comparison between `CycloneDX` and SPDX documents.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct NormalizedSbom {
18    /// Document-level metadata
19    pub document: DocumentMetadata,
20    /// Components indexed by canonical ID
21    pub components: IndexMap<CanonicalId, Component>,
22    /// Dependency edges
23    pub edges: Vec<DependencyEdge>,
24    /// Format-specific extensions
25    pub extensions: FormatExtensions,
26    /// Content hash for quick equality checks
27    pub content_hash: u64,
28    /// Primary/root product component (`CycloneDX` metadata.component or SPDX documentDescribes)
29    /// This identifies the main product that this SBOM describes, important for CRA compliance.
30    pub primary_component_id: Option<CanonicalId>,
31    /// Number of canonical ID collisions encountered during parsing
32    #[serde(skip)]
33    pub collision_count: usize,
34}
35
36impl NormalizedSbom {
37    /// Create a new empty normalized SBOM
38    #[must_use]
39    pub fn new(document: DocumentMetadata) -> Self {
40        Self {
41            document,
42            components: IndexMap::new(),
43            edges: Vec::new(),
44            extensions: FormatExtensions::default(),
45            content_hash: 0,
46            primary_component_id: None,
47            collision_count: 0,
48        }
49    }
50
51    /// Add a component to the SBOM.
52    ///
53    /// Returns `true` if a collision occurred (a component with the same canonical ID
54    /// was already present and has been overwritten). Collisions are logged as warnings.
55    pub fn add_component(&mut self, component: Component) -> bool {
56        let id = component.canonical_id.clone();
57        if let Some(existing) = self.components.get(&id) {
58            // Count genuinely different components that collide on canonical ID
59            if existing.identifiers.format_id != component.identifiers.format_id
60                || existing.name != component.name
61            {
62                self.collision_count += 1;
63            }
64            self.components.insert(id, component);
65            true
66        } else {
67            self.components.insert(id, component);
68            false
69        }
70    }
71
72    /// Log a single summary line if any canonical ID collisions occurred during parsing.
73    pub fn log_collision_summary(&self) {
74        if self.collision_count > 0 {
75            tracing::info!(
76                collision_count = self.collision_count,
77                "Canonical ID collisions: {} distinct components resolved to the same ID \
78                 and were overwritten. Consider adding PURL identifiers to disambiguate.",
79                self.collision_count
80            );
81        }
82    }
83
84    /// Add a dependency edge
85    pub fn add_edge(&mut self, edge: DependencyEdge) {
86        self.edges.push(edge);
87    }
88
89    /// Get a component by canonical ID
90    #[must_use]
91    pub fn get_component(&self, id: &CanonicalId) -> Option<&Component> {
92        self.components.get(id)
93    }
94
95    /// Get dependencies of a component
96    #[must_use]
97    pub fn get_dependencies(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
98        self.edges.iter().filter(|e| &e.from == id).collect()
99    }
100
101    /// Get dependents of a component
102    #[must_use]
103    pub fn get_dependents(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
104        self.edges.iter().filter(|e| &e.to == id).collect()
105    }
106
107    /// Calculate and update the content hash
108    pub fn calculate_content_hash(&mut self) {
109        let mut hasher_input = Vec::new();
110
111        // Hash document metadata
112        if let Ok(meta_json) = serde_json::to_vec(&self.document) {
113            hasher_input.extend(meta_json);
114        }
115
116        // Hash all components (sorted for determinism)
117        let mut component_ids: Vec<_> = self.components.keys().collect();
118        component_ids.sort_by(|a, b| a.value().cmp(b.value()));
119
120        for id in component_ids {
121            if let Some(comp) = self.components.get(id) {
122                hasher_input.extend(comp.content_hash.to_le_bytes());
123            }
124        }
125
126        // Hash edges (sorted for determinism, including relationship and scope)
127        let mut edge_keys: Vec<_> = self
128            .edges
129            .iter()
130            .map(|edge| {
131                (
132                    edge.from.value(),
133                    edge.to.value(),
134                    edge.relationship.to_string(),
135                    edge.scope
136                        .as_ref()
137                        .map_or(String::new(), std::string::ToString::to_string),
138                )
139            })
140            .collect();
141        edge_keys.sort();
142        for (from, to, relationship, scope) in &edge_keys {
143            hasher_input.extend(from.as_bytes());
144            hasher_input.extend(to.as_bytes());
145            hasher_input.extend(relationship.as_bytes());
146            hasher_input.extend(scope.as_bytes());
147        }
148
149        self.content_hash = xxh3_64(&hasher_input);
150    }
151
152    /// Get total component count
153    #[must_use]
154    pub fn component_count(&self) -> usize {
155        self.components.len()
156    }
157
158    /// Get the primary/root product component if set
159    #[must_use]
160    pub fn primary_component(&self) -> Option<&Component> {
161        self.primary_component_id
162            .as_ref()
163            .and_then(|id| self.components.get(id))
164    }
165
166    /// Set the primary component by its canonical ID
167    pub fn set_primary_component(&mut self, id: CanonicalId) {
168        self.primary_component_id = Some(id);
169    }
170
171    /// Get all unique ecosystems in the SBOM
172    pub fn ecosystems(&self) -> Vec<&Ecosystem> {
173        let mut ecosystems: Vec<_> = self
174            .components
175            .values()
176            .filter_map(|c| c.ecosystem.as_ref())
177            .collect();
178        ecosystems.sort_by_key(std::string::ToString::to_string);
179        ecosystems.dedup();
180        ecosystems
181    }
182
183    /// Get all vulnerabilities across all components
184    #[must_use]
185    pub fn all_vulnerabilities(&self) -> Vec<(&Component, &VulnerabilityRef)> {
186        self.components
187            .values()
188            .flat_map(|c| c.vulnerabilities.iter().map(move |v| (c, v)))
189            .collect()
190    }
191
192    /// Count vulnerabilities by severity
193    #[must_use]
194    pub fn vulnerability_counts(&self) -> VulnerabilityCounts {
195        let mut counts = VulnerabilityCounts::default();
196        for (_, vuln) in self.all_vulnerabilities() {
197            match vuln.severity {
198                Some(super::Severity::Critical) => counts.critical += 1,
199                Some(super::Severity::High) => counts.high += 1,
200                Some(super::Severity::Medium) => counts.medium += 1,
201                Some(super::Severity::Low) => counts.low += 1,
202                _ => counts.unknown += 1,
203            }
204        }
205        counts
206    }
207
208    /// Build an index for this SBOM.
209    ///
210    /// The index provides O(1) lookups for dependencies, dependents,
211    /// and name-based searches. Build once and reuse for multiple operations.
212    ///
213    /// # Example
214    ///
215    /// ```ignore
216    /// let sbom = parse_sbom(&path)?;
217    /// let index = sbom.build_index();
218    ///
219    /// // Fast dependency lookup
220    /// let deps = index.dependencies_of(&component_id, &sbom.edges);
221    /// ```
222    pub fn build_index(&self) -> super::NormalizedSbomIndex {
223        super::NormalizedSbomIndex::build(self)
224    }
225
226    /// Get dependencies using an index (O(k) instead of O(edges)).
227    ///
228    /// Use this when you have a prebuilt index for repeated lookups.
229    #[must_use]
230    pub fn get_dependencies_indexed<'a>(
231        &'a self,
232        id: &CanonicalId,
233        index: &super::NormalizedSbomIndex,
234    ) -> Vec<&'a DependencyEdge> {
235        index.dependencies_of(id, &self.edges)
236    }
237
238    /// Get dependents using an index (O(k) instead of O(edges)).
239    ///
240    /// Use this when you have a prebuilt index for repeated lookups.
241    #[must_use]
242    pub fn get_dependents_indexed<'a>(
243        &'a self,
244        id: &CanonicalId,
245        index: &super::NormalizedSbomIndex,
246    ) -> Vec<&'a DependencyEdge> {
247        index.dependents_of(id, &self.edges)
248    }
249
250    /// Find components by name (case-insensitive) using an index.
251    ///
252    /// Returns components whose lowercased name exactly matches the query.
253    #[must_use]
254    pub fn find_by_name_indexed(
255        &self,
256        name: &str,
257        index: &super::NormalizedSbomIndex,
258    ) -> Vec<&Component> {
259        let name_lower = name.to_lowercase();
260        index
261            .find_by_name_lower(&name_lower)
262            .iter()
263            .filter_map(|id| self.components.get(id))
264            .collect()
265    }
266
267    /// Search components by name (case-insensitive substring) using an index.
268    ///
269    /// Returns components whose name contains the query substring.
270    #[must_use]
271    pub fn search_by_name_indexed(
272        &self,
273        query: &str,
274        index: &super::NormalizedSbomIndex,
275    ) -> Vec<&Component> {
276        let query_lower = query.to_lowercase();
277        index
278            .search_by_name(&query_lower)
279            .iter()
280            .filter_map(|id| self.components.get(id))
281            .collect()
282    }
283
284    /// Apply CRA sidecar metadata to supplement SBOM fields.
285    ///
286    /// Sidecar values only override SBOM fields if the SBOM field is None/empty.
287    /// This ensures SBOM data takes precedence when available.
288    pub fn apply_cra_sidecar(&mut self, sidecar: &super::CraSidecarMetadata) {
289        // Only apply if SBOM doesn't already have the value
290        if self.document.security_contact.is_none() {
291            self.document
292                .security_contact
293                .clone_from(&sidecar.security_contact);
294        }
295
296        if self.document.vulnerability_disclosure_url.is_none() {
297            self.document
298                .vulnerability_disclosure_url
299                .clone_from(&sidecar.vulnerability_disclosure_url);
300        }
301
302        if self.document.support_end_date.is_none() {
303            self.document.support_end_date = sidecar.support_end_date;
304        }
305
306        if self.document.name.is_none() {
307            self.document.name.clone_from(&sidecar.product_name);
308        }
309
310        // Add manufacturer as creator if not present
311        if let Some(manufacturer) = &sidecar.manufacturer_name {
312            let has_org = self
313                .document
314                .creators
315                .iter()
316                .any(|c| c.creator_type == super::CreatorType::Organization);
317
318            if !has_org {
319                self.document.creators.push(super::Creator {
320                    creator_type: super::CreatorType::Organization,
321                    name: manufacturer.clone(),
322                    email: sidecar.manufacturer_email.clone(),
323                });
324            }
325        }
326    }
327}
328
329impl Default for NormalizedSbom {
330    fn default() -> Self {
331        Self::new(DocumentMetadata::default())
332    }
333}
334
335/// Vulnerability counts by severity
336#[derive(Debug, Clone, Default, Serialize, Deserialize)]
337pub struct VulnerabilityCounts {
338    pub critical: usize,
339    pub high: usize,
340    pub medium: usize,
341    pub low: usize,
342    pub unknown: usize,
343}
344
345impl VulnerabilityCounts {
346    #[must_use]
347    pub const fn total(&self) -> usize {
348        self.critical + self.high + self.medium + self.low + self.unknown
349    }
350}
351
352/// Staleness level classification for dependencies
353#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
354#[non_exhaustive]
355pub enum StalenessLevel {
356    /// Updated within 6 months
357    Fresh,
358    /// 6-12 months since last update
359    Aging,
360    /// 1-2 years since last update
361    Stale,
362    /// More than 2 years since last update
363    Abandoned,
364    /// Explicitly marked as deprecated
365    Deprecated,
366    /// Repository/package archived
367    Archived,
368}
369
370impl StalenessLevel {
371    /// Create from age in days
372    #[must_use]
373    pub const fn from_days(days: u32) -> Self {
374        match days {
375            0..=182 => Self::Fresh,   // ~6 months
376            183..=365 => Self::Aging, // 6-12 months
377            366..=730 => Self::Stale, // 1-2 years
378            _ => Self::Abandoned,     // >2 years
379        }
380    }
381
382    /// Get display label
383    #[must_use]
384    pub const fn label(&self) -> &'static str {
385        match self {
386            Self::Fresh => "Fresh",
387            Self::Aging => "Aging",
388            Self::Stale => "Stale",
389            Self::Abandoned => "Abandoned",
390            Self::Deprecated => "Deprecated",
391            Self::Archived => "Archived",
392        }
393    }
394
395    /// Get icon for TUI display
396    #[must_use]
397    pub const fn icon(&self) -> &'static str {
398        match self {
399            Self::Fresh => "✓",
400            Self::Aging => "⏳",
401            Self::Stale => "⚠",
402            Self::Abandoned => "⛔",
403            Self::Deprecated => "⊘",
404            Self::Archived => "📦",
405        }
406    }
407
408    /// Get severity weight (higher = worse)
409    #[must_use]
410    pub const fn severity(&self) -> u8 {
411        match self {
412            Self::Fresh => 0,
413            Self::Aging => 1,
414            Self::Stale => 2,
415            Self::Abandoned => 3,
416            Self::Deprecated | Self::Archived => 4,
417        }
418    }
419}
420
421impl std::fmt::Display for StalenessLevel {
422    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
423        write!(f, "{}", self.label())
424    }
425}
426
427/// Staleness information for a component
428#[derive(Debug, Clone, Serialize, Deserialize)]
429pub struct StalenessInfo {
430    /// Staleness classification
431    pub level: StalenessLevel,
432    /// Last publish/release date
433    pub last_published: Option<chrono::DateTime<chrono::Utc>>,
434    /// Whether explicitly deprecated by maintainer
435    pub is_deprecated: bool,
436    /// Whether repository/package is archived
437    pub is_archived: bool,
438    /// Deprecation message if available
439    pub deprecation_message: Option<String>,
440    /// Days since last update
441    pub days_since_update: Option<u32>,
442    /// Latest available version (if different from current)
443    pub latest_version: Option<String>,
444}
445
446impl StalenessInfo {
447    /// Create new staleness info
448    #[must_use]
449    pub const fn new(level: StalenessLevel) -> Self {
450        Self {
451            level,
452            last_published: None,
453            is_deprecated: false,
454            is_archived: false,
455            deprecation_message: None,
456            days_since_update: None,
457            latest_version: None,
458        }
459    }
460
461    /// Create from last published date
462    #[must_use]
463    pub fn from_date(last_published: chrono::DateTime<chrono::Utc>) -> Self {
464        let days = (chrono::Utc::now() - last_published).num_days() as u32;
465        let level = StalenessLevel::from_days(days);
466        Self {
467            level,
468            last_published: Some(last_published),
469            is_deprecated: false,
470            is_archived: false,
471            deprecation_message: None,
472            days_since_update: Some(days),
473            latest_version: None,
474        }
475    }
476
477    /// Check if component needs attention (stale or worse)
478    #[must_use]
479    pub const fn needs_attention(&self) -> bool {
480        self.level.severity() >= 2
481    }
482}
483
484/// End-of-life status classification for components
485#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
486#[non_exhaustive]
487pub enum EolStatus {
488    /// Actively receiving updates
489    Supported,
490    /// Active support ended, security patches continue (LTS phase)
491    SecurityOnly,
492    /// Within 6 months of EOL date
493    ApproachingEol,
494    /// Past EOL, no more updates
495    EndOfLife,
496    /// Product found but cycle not matched
497    Unknown,
498}
499
500impl EolStatus {
501    /// Get display label
502    #[must_use]
503    pub const fn label(&self) -> &'static str {
504        match self {
505            Self::Supported => "Supported",
506            Self::SecurityOnly => "Security Only",
507            Self::ApproachingEol => "Approaching EOL",
508            Self::EndOfLife => "End of Life",
509            Self::Unknown => "Unknown",
510        }
511    }
512
513    /// Get icon for TUI display
514    #[must_use]
515    pub const fn icon(&self) -> &'static str {
516        match self {
517            Self::Supported => "✓",
518            Self::SecurityOnly => "🔒",
519            Self::ApproachingEol => "⚠",
520            Self::EndOfLife => "⛔",
521            Self::Unknown => "?",
522        }
523    }
524
525    /// Get severity weight (higher = worse)
526    #[must_use]
527    pub const fn severity(&self) -> u8 {
528        match self {
529            Self::Supported => 0,
530            Self::SecurityOnly => 1,
531            Self::ApproachingEol => 2,
532            Self::EndOfLife => 3,
533            Self::Unknown => 0,
534        }
535    }
536}
537
538impl std::fmt::Display for EolStatus {
539    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
540        write!(f, "{}", self.label())
541    }
542}
543
544/// End-of-life information for a component
545#[derive(Debug, Clone, Serialize, Deserialize)]
546pub struct EolInfo {
547    /// EOL status classification
548    pub status: EolStatus,
549    /// Matched endoflife.date product slug
550    pub product: String,
551    /// Matched release cycle (e.g., "3.11")
552    pub cycle: String,
553    /// EOL date if known
554    pub eol_date: Option<chrono::NaiveDate>,
555    /// Active support end date
556    pub support_end_date: Option<chrono::NaiveDate>,
557    /// Whether this is an LTS release
558    pub is_lts: bool,
559    /// Latest patch version in this cycle
560    pub latest_in_cycle: Option<String>,
561    /// Latest release date in this cycle
562    pub latest_release_date: Option<chrono::NaiveDate>,
563    /// Days until EOL (negative = past EOL)
564    pub days_until_eol: Option<i64>,
565}
566
567impl EolInfo {
568    /// Check if the component needs attention (approaching or past EOL)
569    #[must_use]
570    pub const fn needs_attention(&self) -> bool {
571        self.status.severity() >= 2
572    }
573}
574
575/// Component in the normalized SBOM
576#[derive(Debug, Clone, Serialize, Deserialize)]
577pub struct Component {
578    /// Canonical identifier
579    pub canonical_id: CanonicalId,
580    /// Various identifiers (PURL, CPE, etc.)
581    pub identifiers: ComponentIdentifiers,
582    /// Component name
583    pub name: String,
584    /// Version string
585    pub version: Option<String>,
586    /// Parsed semantic version (if valid)
587    pub semver: Option<semver::Version>,
588    /// Component type
589    pub component_type: ComponentType,
590    /// Package ecosystem
591    pub ecosystem: Option<Ecosystem>,
592    /// License information
593    pub licenses: LicenseInfo,
594    /// Supplier/vendor information
595    pub supplier: Option<Organization>,
596    /// Cryptographic hashes
597    pub hashes: Vec<Hash>,
598    /// External references
599    pub external_refs: Vec<ExternalReference>,
600    /// Known vulnerabilities
601    pub vulnerabilities: Vec<VulnerabilityRef>,
602    /// VEX status
603    pub vex_status: Option<VexStatus>,
604    /// Content hash for quick comparison
605    pub content_hash: u64,
606    /// Format-specific extensions
607    pub extensions: ComponentExtensions,
608    /// Description
609    pub description: Option<String>,
610    /// Copyright text
611    pub copyright: Option<String>,
612    /// Author information
613    pub author: Option<String>,
614    /// Group/namespace (e.g., Maven groupId)
615    pub group: Option<String>,
616    /// Staleness information (populated by enrichment)
617    pub staleness: Option<StalenessInfo>,
618    /// End-of-life information (populated by enrichment)
619    pub eol: Option<EolInfo>,
620}
621
622impl Component {
623    /// Create a new component with minimal required fields
624    #[must_use]
625    pub fn new(name: String, format_id: String) -> Self {
626        let identifiers = ComponentIdentifiers::new(format_id);
627        let canonical_id = identifiers.canonical_id();
628
629        Self {
630            canonical_id,
631            identifiers,
632            name,
633            version: None,
634            semver: None,
635            component_type: ComponentType::Library,
636            ecosystem: None,
637            licenses: LicenseInfo::default(),
638            supplier: None,
639            hashes: Vec::new(),
640            external_refs: Vec::new(),
641            vulnerabilities: Vec::new(),
642            vex_status: None,
643            content_hash: 0,
644            extensions: ComponentExtensions::default(),
645            description: None,
646            copyright: None,
647            author: None,
648            group: None,
649            staleness: None,
650            eol: None,
651        }
652    }
653
654    /// Set the PURL and update canonical ID
655    #[must_use]
656    pub fn with_purl(mut self, purl: String) -> Self {
657        self.identifiers.purl = Some(purl);
658        self.canonical_id = self.identifiers.canonical_id();
659
660        // Try to extract ecosystem from PURL
661        if let Some(purl_str) = &self.identifiers.purl
662            && let Some(purl_type) = purl_str
663                .strip_prefix("pkg:")
664                .and_then(|s| s.split('/').next())
665        {
666            self.ecosystem = Some(Ecosystem::from_purl_type(purl_type));
667        }
668
669        self
670    }
671
672    /// Set the version and try to parse as semver
673    #[must_use]
674    pub fn with_version(mut self, version: String) -> Self {
675        self.semver = semver::Version::parse(&version).ok();
676        self.version = Some(version);
677        self
678    }
679
680    /// Calculate and update content hash
681    pub fn calculate_content_hash(&mut self) {
682        let mut hasher_input = Vec::new();
683
684        hasher_input.extend(self.name.as_bytes());
685        if let Some(v) = &self.version {
686            hasher_input.extend(v.as_bytes());
687        }
688        if let Some(purl) = &self.identifiers.purl {
689            hasher_input.extend(purl.as_bytes());
690        }
691        for license in &self.licenses.declared {
692            hasher_input.extend(license.expression.as_bytes());
693        }
694        if let Some(supplier) = &self.supplier {
695            hasher_input.extend(supplier.name.as_bytes());
696        }
697        for hash in &self.hashes {
698            hasher_input.extend(hash.value.as_bytes());
699        }
700        for vuln in &self.vulnerabilities {
701            hasher_input.extend(vuln.id.as_bytes());
702        }
703
704        self.content_hash = xxh3_64(&hasher_input);
705    }
706
707    /// Check if this is an OSS (open source) component
708    #[must_use]
709    pub fn is_oss(&self) -> bool {
710        // Check if any declared license is OSS
711        self.licenses.declared.iter().any(|l| l.is_valid_spdx) || self.identifiers.purl.is_some()
712    }
713
714    /// Get display name with version
715    #[must_use]
716    pub fn display_name(&self) -> String {
717        self.version
718            .as_ref()
719            .map_or_else(|| self.name.clone(), |v| format!("{}@{}", self.name, v))
720    }
721}
722
723/// Dependency edge between components
724#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
725pub struct DependencyEdge {
726    /// Source component
727    pub from: CanonicalId,
728    /// Target component
729    pub to: CanonicalId,
730    /// Relationship type
731    pub relationship: DependencyType,
732    /// Dependency scope
733    pub scope: Option<DependencyScope>,
734}
735
736impl DependencyEdge {
737    /// Create a new dependency edge
738    #[must_use]
739    pub const fn new(from: CanonicalId, to: CanonicalId, relationship: DependencyType) -> Self {
740        Self {
741            from,
742            to,
743            relationship,
744            scope: None,
745        }
746    }
747
748    /// Set the dependency scope
749    #[must_use]
750    pub const fn with_scope(mut self, scope: DependencyScope) -> Self {
751        self.scope = Some(scope);
752        self
753    }
754
755    /// Check if this is a direct dependency
756    #[must_use]
757    pub const fn is_direct(&self) -> bool {
758        matches!(
759            self.relationship,
760            DependencyType::DependsOn
761                | DependencyType::DevDependsOn
762                | DependencyType::BuildDependsOn
763                | DependencyType::TestDependsOn
764                | DependencyType::RuntimeDependsOn
765        )
766    }
767}