Skip to main content

sbom_tools/model/
sbom.rs

1//! Core SBOM and Component data structures.
2
3use super::{
4    CanonicalId, ComponentExtensions, ComponentIdentifiers, ComponentType, DependencyScope,
5    DependencyType, DocumentMetadata, Ecosystem, ExternalReference, FormatExtensions, Hash,
6    LicenseInfo, Organization, VexStatus, VulnerabilityRef,
7};
8use indexmap::IndexMap;
9use serde::{Deserialize, Serialize};
10use xxhash_rust::xxh3::xxh3_64;
11
12/// Normalized SBOM document - the canonical intermediate representation.
13///
14/// This structure represents an SBOM in a format-agnostic way, allowing
15/// comparison between `CycloneDX` and SPDX documents.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct NormalizedSbom {
18    /// Document-level metadata
19    pub document: DocumentMetadata,
20    /// Components indexed by canonical ID
21    pub components: IndexMap<CanonicalId, Component>,
22    /// Dependency edges
23    pub edges: Vec<DependencyEdge>,
24    /// Format-specific extensions
25    pub extensions: FormatExtensions,
26    /// Content hash for quick equality checks
27    pub content_hash: u64,
28    /// Primary/root product component (`CycloneDX` metadata.component or SPDX documentDescribes)
29    /// This identifies the main product that this SBOM describes, important for CRA compliance.
30    pub primary_component_id: Option<CanonicalId>,
31    /// Number of canonical ID collisions encountered during parsing
32    #[serde(skip)]
33    pub collision_count: usize,
34}
35
36impl NormalizedSbom {
37    /// Create a new empty normalized SBOM
38    #[must_use] 
39    pub fn new(document: DocumentMetadata) -> Self {
40        Self {
41            document,
42            components: IndexMap::new(),
43            edges: Vec::new(),
44            extensions: FormatExtensions::default(),
45            content_hash: 0,
46            primary_component_id: None,
47            collision_count: 0,
48        }
49    }
50
51    /// Add a component to the SBOM.
52    ///
53    /// Returns `true` if a collision occurred (a component with the same canonical ID
54    /// was already present and has been overwritten). Collisions are logged as warnings.
55    pub fn add_component(&mut self, component: Component) -> bool {
56        let id = component.canonical_id.clone();
57        if let Some(existing) = self.components.get(&id) {
58            // Count genuinely different components that collide on canonical ID
59            if existing.identifiers.format_id != component.identifiers.format_id
60                || existing.name != component.name
61            {
62                self.collision_count += 1;
63            }
64            self.components.insert(id, component);
65            true
66        } else {
67            self.components.insert(id, component);
68            false
69        }
70    }
71
72    /// Log a single summary line if any canonical ID collisions occurred during parsing.
73    pub fn log_collision_summary(&self) {
74        if self.collision_count > 0 {
75            tracing::info!(
76                collision_count = self.collision_count,
77                "Canonical ID collisions: {} distinct components resolved to the same ID \
78                 and were overwritten. Consider adding PURL identifiers to disambiguate.",
79                self.collision_count
80            );
81        }
82    }
83
84    /// Add a dependency edge
85    pub fn add_edge(&mut self, edge: DependencyEdge) {
86        self.edges.push(edge);
87    }
88
89    /// Get a component by canonical ID
90    #[must_use] 
91    pub fn get_component(&self, id: &CanonicalId) -> Option<&Component> {
92        self.components.get(id)
93    }
94
95    /// Get dependencies of a component
96    #[must_use] 
97    pub fn get_dependencies(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
98        self.edges.iter().filter(|e| &e.from == id).collect()
99    }
100
101    /// Get dependents of a component
102    #[must_use] 
103    pub fn get_dependents(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
104        self.edges.iter().filter(|e| &e.to == id).collect()
105    }
106
107    /// Calculate and update the content hash
108    pub fn calculate_content_hash(&mut self) {
109        let mut hasher_input = Vec::new();
110
111        // Hash document metadata
112        if let Ok(meta_json) = serde_json::to_vec(&self.document) {
113            hasher_input.extend(meta_json);
114        }
115
116        // Hash all components (sorted for determinism)
117        let mut component_ids: Vec<_> = self.components.keys().collect();
118        component_ids.sort_by(|a, b| a.value().cmp(b.value()));
119
120        for id in component_ids {
121            if let Some(comp) = self.components.get(id) {
122                hasher_input.extend(comp.content_hash.to_le_bytes());
123            }
124        }
125
126        // Hash edges
127        for edge in &self.edges {
128            hasher_input.extend(edge.from.value().as_bytes());
129            hasher_input.extend(edge.to.value().as_bytes());
130        }
131
132        self.content_hash = xxh3_64(&hasher_input);
133    }
134
135    /// Get total component count
136    #[must_use] 
137    pub fn component_count(&self) -> usize {
138        self.components.len()
139    }
140
141    /// Get the primary/root product component if set
142    #[must_use] 
143    pub fn primary_component(&self) -> Option<&Component> {
144        self.primary_component_id
145            .as_ref()
146            .and_then(|id| self.components.get(id))
147    }
148
149    /// Set the primary component by its canonical ID
150    pub fn set_primary_component(&mut self, id: CanonicalId) {
151        self.primary_component_id = Some(id);
152    }
153
154    /// Get all unique ecosystems in the SBOM
155    pub fn ecosystems(&self) -> Vec<&Ecosystem> {
156        let mut ecosystems: Vec<_> = self
157            .components
158            .values()
159            .filter_map(|c| c.ecosystem.as_ref())
160            .collect();
161        ecosystems.sort_by_key(std::string::ToString::to_string);
162        ecosystems.dedup();
163        ecosystems
164    }
165
166    /// Get all vulnerabilities across all components
167    #[must_use] 
168    pub fn all_vulnerabilities(&self) -> Vec<(&Component, &VulnerabilityRef)> {
169        self.components
170            .values()
171            .flat_map(|c| c.vulnerabilities.iter().map(move |v| (c, v)))
172            .collect()
173    }
174
175    /// Count vulnerabilities by severity
176    #[must_use] 
177    pub fn vulnerability_counts(&self) -> VulnerabilityCounts {
178        let mut counts = VulnerabilityCounts::default();
179        for (_, vuln) in self.all_vulnerabilities() {
180            match vuln.severity {
181                Some(super::Severity::Critical) => counts.critical += 1,
182                Some(super::Severity::High) => counts.high += 1,
183                Some(super::Severity::Medium) => counts.medium += 1,
184                Some(super::Severity::Low) => counts.low += 1,
185                _ => counts.unknown += 1,
186            }
187        }
188        counts
189    }
190
191    /// Build an index for this SBOM.
192    ///
193    /// The index provides O(1) lookups for dependencies, dependents,
194    /// and name-based searches. Build once and reuse for multiple operations.
195    ///
196    /// # Example
197    ///
198    /// ```ignore
199    /// let sbom = parse_sbom(&path)?;
200    /// let index = sbom.build_index();
201    ///
202    /// // Fast dependency lookup
203    /// let deps = index.dependencies_of(&component_id, &sbom.edges);
204    /// ```
205    pub fn build_index(&self) -> super::NormalizedSbomIndex {
206        super::NormalizedSbomIndex::build(self)
207    }
208
209    /// Get dependencies using an index (O(k) instead of O(edges)).
210    ///
211    /// Use this when you have a prebuilt index for repeated lookups.
212    #[must_use] 
213    pub fn get_dependencies_indexed<'a>(
214        &'a self,
215        id: &CanonicalId,
216        index: &super::NormalizedSbomIndex,
217    ) -> Vec<&'a DependencyEdge> {
218        index.dependencies_of(id, &self.edges)
219    }
220
221    /// Get dependents using an index (O(k) instead of O(edges)).
222    ///
223    /// Use this when you have a prebuilt index for repeated lookups.
224    #[must_use] 
225    pub fn get_dependents_indexed<'a>(
226        &'a self,
227        id: &CanonicalId,
228        index: &super::NormalizedSbomIndex,
229    ) -> Vec<&'a DependencyEdge> {
230        index.dependents_of(id, &self.edges)
231    }
232
233    /// Find components by name (case-insensitive) using an index.
234    ///
235    /// Returns components whose lowercased name exactly matches the query.
236    #[must_use] 
237    pub fn find_by_name_indexed(
238        &self,
239        name: &str,
240        index: &super::NormalizedSbomIndex,
241    ) -> Vec<&Component> {
242        let name_lower = name.to_lowercase();
243        index
244            .find_by_name_lower(&name_lower)
245            .iter()
246            .filter_map(|id| self.components.get(id))
247            .collect()
248    }
249
250    /// Search components by name (case-insensitive substring) using an index.
251    ///
252    /// Returns components whose name contains the query substring.
253    #[must_use] 
254    pub fn search_by_name_indexed(
255        &self,
256        query: &str,
257        index: &super::NormalizedSbomIndex,
258    ) -> Vec<&Component> {
259        let query_lower = query.to_lowercase();
260        index
261            .search_by_name(&query_lower)
262            .iter()
263            .filter_map(|id| self.components.get(id))
264            .collect()
265    }
266
267    /// Apply CRA sidecar metadata to supplement SBOM fields.
268    ///
269    /// Sidecar values only override SBOM fields if the SBOM field is None/empty.
270    /// This ensures SBOM data takes precedence when available.
271    pub fn apply_cra_sidecar(&mut self, sidecar: &super::CraSidecarMetadata) {
272        // Only apply if SBOM doesn't already have the value
273        if self.document.security_contact.is_none() {
274            self.document.security_contact.clone_from(&sidecar.security_contact);
275        }
276
277        if self.document.vulnerability_disclosure_url.is_none() {
278            self.document
279                .vulnerability_disclosure_url
280                .clone_from(&sidecar.vulnerability_disclosure_url);
281        }
282
283        if self.document.support_end_date.is_none() {
284            self.document.support_end_date = sidecar.support_end_date;
285        }
286
287        if self.document.name.is_none() {
288            self.document.name.clone_from(&sidecar.product_name);
289        }
290
291        // Add manufacturer as creator if not present
292        if let Some(manufacturer) = &sidecar.manufacturer_name {
293            let has_org = self
294                .document
295                .creators
296                .iter()
297                .any(|c| c.creator_type == super::CreatorType::Organization);
298
299            if !has_org {
300                self.document.creators.push(super::Creator {
301                    creator_type: super::CreatorType::Organization,
302                    name: manufacturer.clone(),
303                    email: sidecar.manufacturer_email.clone(),
304                });
305            }
306        }
307    }
308}
309
310impl Default for NormalizedSbom {
311    fn default() -> Self {
312        Self::new(DocumentMetadata::default())
313    }
314}
315
316/// Vulnerability counts by severity
317#[derive(Debug, Clone, Default, Serialize, Deserialize)]
318pub struct VulnerabilityCounts {
319    pub critical: usize,
320    pub high: usize,
321    pub medium: usize,
322    pub low: usize,
323    pub unknown: usize,
324}
325
326impl VulnerabilityCounts {
327    #[must_use] 
328    pub const fn total(&self) -> usize {
329        self.critical + self.high + self.medium + self.low + self.unknown
330    }
331}
332
333/// Staleness level classification for dependencies
334#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
335#[non_exhaustive]
336pub enum StalenessLevel {
337    /// Updated within 6 months
338    Fresh,
339    /// 6-12 months since last update
340    Aging,
341    /// 1-2 years since last update
342    Stale,
343    /// More than 2 years since last update
344    Abandoned,
345    /// Explicitly marked as deprecated
346    Deprecated,
347    /// Repository/package archived
348    Archived,
349}
350
351impl StalenessLevel {
352    /// Create from age in days
353    #[must_use] 
354    pub const fn from_days(days: u32) -> Self {
355        match days {
356            0..=182 => Self::Fresh,      // ~6 months
357            183..=365 => Self::Aging,    // 6-12 months
358            366..=730 => Self::Stale,    // 1-2 years
359            _ => Self::Abandoned,        // >2 years
360        }
361    }
362
363    /// Get display label
364    #[must_use] 
365    pub const fn label(&self) -> &'static str {
366        match self {
367            Self::Fresh => "Fresh",
368            Self::Aging => "Aging",
369            Self::Stale => "Stale",
370            Self::Abandoned => "Abandoned",
371            Self::Deprecated => "Deprecated",
372            Self::Archived => "Archived",
373        }
374    }
375
376    /// Get icon for TUI display
377    #[must_use] 
378    pub const fn icon(&self) -> &'static str {
379        match self {
380            Self::Fresh => "✓",
381            Self::Aging => "⏳",
382            Self::Stale => "⚠",
383            Self::Abandoned => "⛔",
384            Self::Deprecated => "⊘",
385            Self::Archived => "📦",
386        }
387    }
388
389    /// Get severity weight (higher = worse)
390    #[must_use] 
391    pub const fn severity(&self) -> u8 {
392        match self {
393            Self::Fresh => 0,
394            Self::Aging => 1,
395            Self::Stale => 2,
396            Self::Abandoned => 3,
397            Self::Deprecated | Self::Archived => 4,
398        }
399    }
400}
401
402impl std::fmt::Display for StalenessLevel {
403    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
404        write!(f, "{}", self.label())
405    }
406}
407
408/// Staleness information for a component
409#[derive(Debug, Clone, Serialize, Deserialize)]
410pub struct StalenessInfo {
411    /// Staleness classification
412    pub level: StalenessLevel,
413    /// Last publish/release date
414    pub last_published: Option<chrono::DateTime<chrono::Utc>>,
415    /// Whether explicitly deprecated by maintainer
416    pub is_deprecated: bool,
417    /// Whether repository/package is archived
418    pub is_archived: bool,
419    /// Deprecation message if available
420    pub deprecation_message: Option<String>,
421    /// Days since last update
422    pub days_since_update: Option<u32>,
423    /// Latest available version (if different from current)
424    pub latest_version: Option<String>,
425}
426
427impl StalenessInfo {
428    /// Create new staleness info
429    #[must_use] 
430    pub const fn new(level: StalenessLevel) -> Self {
431        Self {
432            level,
433            last_published: None,
434            is_deprecated: false,
435            is_archived: false,
436            deprecation_message: None,
437            days_since_update: None,
438            latest_version: None,
439        }
440    }
441
442    /// Create from last published date
443    #[must_use] 
444    pub fn from_date(last_published: chrono::DateTime<chrono::Utc>) -> Self {
445        let days = (chrono::Utc::now() - last_published).num_days() as u32;
446        let level = StalenessLevel::from_days(days);
447        Self {
448            level,
449            last_published: Some(last_published),
450            is_deprecated: false,
451            is_archived: false,
452            deprecation_message: None,
453            days_since_update: Some(days),
454            latest_version: None,
455        }
456    }
457
458    /// Check if component needs attention (stale or worse)
459    #[must_use]
460    pub const fn needs_attention(&self) -> bool {
461        self.level.severity() >= 2
462    }
463}
464
465/// End-of-life status classification for components
466#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
467#[non_exhaustive]
468pub enum EolStatus {
469    /// Actively receiving updates
470    Supported,
471    /// Active support ended, security patches continue (LTS phase)
472    SecurityOnly,
473    /// Within 6 months of EOL date
474    ApproachingEol,
475    /// Past EOL, no more updates
476    EndOfLife,
477    /// Product found but cycle not matched
478    Unknown,
479}
480
481impl EolStatus {
482    /// Get display label
483    #[must_use]
484    pub const fn label(&self) -> &'static str {
485        match self {
486            Self::Supported => "Supported",
487            Self::SecurityOnly => "Security Only",
488            Self::ApproachingEol => "Approaching EOL",
489            Self::EndOfLife => "End of Life",
490            Self::Unknown => "Unknown",
491        }
492    }
493
494    /// Get icon for TUI display
495    #[must_use]
496    pub const fn icon(&self) -> &'static str {
497        match self {
498            Self::Supported => "✓",
499            Self::SecurityOnly => "🔒",
500            Self::ApproachingEol => "⚠",
501            Self::EndOfLife => "⛔",
502            Self::Unknown => "?",
503        }
504    }
505
506    /// Get severity weight (higher = worse)
507    #[must_use]
508    pub const fn severity(&self) -> u8 {
509        match self {
510            Self::Supported => 0,
511            Self::SecurityOnly => 1,
512            Self::ApproachingEol => 2,
513            Self::EndOfLife => 3,
514            Self::Unknown => 0,
515        }
516    }
517}
518
519impl std::fmt::Display for EolStatus {
520    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
521        write!(f, "{}", self.label())
522    }
523}
524
525/// End-of-life information for a component
526#[derive(Debug, Clone, Serialize, Deserialize)]
527pub struct EolInfo {
528    /// EOL status classification
529    pub status: EolStatus,
530    /// Matched endoflife.date product slug
531    pub product: String,
532    /// Matched release cycle (e.g., "3.11")
533    pub cycle: String,
534    /// EOL date if known
535    pub eol_date: Option<chrono::NaiveDate>,
536    /// Active support end date
537    pub support_end_date: Option<chrono::NaiveDate>,
538    /// Whether this is an LTS release
539    pub is_lts: bool,
540    /// Latest patch version in this cycle
541    pub latest_in_cycle: Option<String>,
542    /// Latest release date in this cycle
543    pub latest_release_date: Option<chrono::NaiveDate>,
544    /// Days until EOL (negative = past EOL)
545    pub days_until_eol: Option<i64>,
546}
547
548impl EolInfo {
549    /// Check if the component needs attention (approaching or past EOL)
550    #[must_use]
551    pub const fn needs_attention(&self) -> bool {
552        self.status.severity() >= 2
553    }
554}
555
556/// Component in the normalized SBOM
557#[derive(Debug, Clone, Serialize, Deserialize)]
558pub struct Component {
559    /// Canonical identifier
560    pub canonical_id: CanonicalId,
561    /// Various identifiers (PURL, CPE, etc.)
562    pub identifiers: ComponentIdentifiers,
563    /// Component name
564    pub name: String,
565    /// Version string
566    pub version: Option<String>,
567    /// Parsed semantic version (if valid)
568    pub semver: Option<semver::Version>,
569    /// Component type
570    pub component_type: ComponentType,
571    /// Package ecosystem
572    pub ecosystem: Option<Ecosystem>,
573    /// License information
574    pub licenses: LicenseInfo,
575    /// Supplier/vendor information
576    pub supplier: Option<Organization>,
577    /// Cryptographic hashes
578    pub hashes: Vec<Hash>,
579    /// External references
580    pub external_refs: Vec<ExternalReference>,
581    /// Known vulnerabilities
582    pub vulnerabilities: Vec<VulnerabilityRef>,
583    /// VEX status
584    pub vex_status: Option<VexStatus>,
585    /// Content hash for quick comparison
586    pub content_hash: u64,
587    /// Format-specific extensions
588    pub extensions: ComponentExtensions,
589    /// Description
590    pub description: Option<String>,
591    /// Copyright text
592    pub copyright: Option<String>,
593    /// Author information
594    pub author: Option<String>,
595    /// Group/namespace (e.g., Maven groupId)
596    pub group: Option<String>,
597    /// Staleness information (populated by enrichment)
598    pub staleness: Option<StalenessInfo>,
599    /// End-of-life information (populated by enrichment)
600    pub eol: Option<EolInfo>,
601}
602
603impl Component {
604    /// Create a new component with minimal required fields
605    #[must_use] 
606    pub fn new(name: String, format_id: String) -> Self {
607        let identifiers = ComponentIdentifiers::new(format_id);
608        let canonical_id = identifiers.canonical_id();
609
610        Self {
611            canonical_id,
612            identifiers,
613            name,
614            version: None,
615            semver: None,
616            component_type: ComponentType::Library,
617            ecosystem: None,
618            licenses: LicenseInfo::default(),
619            supplier: None,
620            hashes: Vec::new(),
621            external_refs: Vec::new(),
622            vulnerabilities: Vec::new(),
623            vex_status: None,
624            content_hash: 0,
625            extensions: ComponentExtensions::default(),
626            description: None,
627            copyright: None,
628            author: None,
629            group: None,
630            staleness: None,
631            eol: None,
632        }
633    }
634
635    /// Set the PURL and update canonical ID
636    #[must_use]
637    pub fn with_purl(mut self, purl: String) -> Self {
638        self.identifiers.purl = Some(purl);
639        self.canonical_id = self.identifiers.canonical_id();
640
641        // Try to extract ecosystem from PURL
642        if let Some(purl_str) = &self.identifiers.purl {
643            if let Some(purl_type) = purl_str
644                .strip_prefix("pkg:")
645                .and_then(|s| s.split('/').next())
646            {
647                self.ecosystem = Some(Ecosystem::from_purl_type(purl_type));
648            }
649        }
650
651        self
652    }
653
654    /// Set the version and try to parse as semver
655    #[must_use]
656    pub fn with_version(mut self, version: String) -> Self {
657        self.semver = semver::Version::parse(&version).ok();
658        self.version = Some(version);
659        self
660    }
661
662    /// Calculate and update content hash
663    pub fn calculate_content_hash(&mut self) {
664        let mut hasher_input = Vec::new();
665
666        hasher_input.extend(self.name.as_bytes());
667        if let Some(v) = &self.version {
668            hasher_input.extend(v.as_bytes());
669        }
670        if let Some(purl) = &self.identifiers.purl {
671            hasher_input.extend(purl.as_bytes());
672        }
673        for license in &self.licenses.declared {
674            hasher_input.extend(license.expression.as_bytes());
675        }
676        if let Some(supplier) = &self.supplier {
677            hasher_input.extend(supplier.name.as_bytes());
678        }
679        for hash in &self.hashes {
680            hasher_input.extend(hash.value.as_bytes());
681        }
682        for vuln in &self.vulnerabilities {
683            hasher_input.extend(vuln.id.as_bytes());
684        }
685
686        self.content_hash = xxh3_64(&hasher_input);
687    }
688
689    /// Check if this is an OSS (open source) component
690    #[must_use] 
691    pub fn is_oss(&self) -> bool {
692        // Check if any declared license is OSS
693        self.licenses.declared.iter().any(|l| l.is_valid_spdx) || self.identifiers.purl.is_some()
694    }
695
696    /// Get display name with version
697    #[must_use] 
698    pub fn display_name(&self) -> String {
699        self.version.as_ref().map_or_else(|| self.name.clone(), |v| format!("{}@{}", self.name, v))
700    }
701}
702
703/// Dependency edge between components
704#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
705pub struct DependencyEdge {
706    /// Source component
707    pub from: CanonicalId,
708    /// Target component
709    pub to: CanonicalId,
710    /// Relationship type
711    pub relationship: DependencyType,
712    /// Dependency scope
713    pub scope: Option<DependencyScope>,
714}
715
716impl DependencyEdge {
717    /// Create a new dependency edge
718    #[must_use] 
719    pub const fn new(from: CanonicalId, to: CanonicalId, relationship: DependencyType) -> Self {
720        Self {
721            from,
722            to,
723            relationship,
724            scope: None,
725        }
726    }
727
728    /// Check if this is a direct dependency
729    #[must_use] 
730    pub const fn is_direct(&self) -> bool {
731        matches!(
732            self.relationship,
733            DependencyType::DependsOn
734                | DependencyType::DevDependsOn
735                | DependencyType::BuildDependsOn
736                | DependencyType::TestDependsOn
737                | DependencyType::RuntimeDependsOn
738        )
739    }
740}