use super::{
CanonicalId, ComponentExtensions, ComponentIdentifiers, ComponentType, CryptoProperties,
DependencyScope, DependencyType, DocumentMetadata, Ecosystem, ExternalReference,
FormatExtensions, Hash, LicenseInfo, Organization, VexStatus, VulnerabilityRef,
};
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use xxhash_rust::xxh3::xxh3_64;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NormalizedSbom {
pub document: DocumentMetadata,
pub components: IndexMap<CanonicalId, Component>,
pub edges: Vec<DependencyEdge>,
pub extensions: FormatExtensions,
pub content_hash: u64,
pub primary_component_id: Option<CanonicalId>,
#[serde(skip)]
pub collision_count: usize,
}
impl NormalizedSbom {
#[must_use]
pub fn new(document: DocumentMetadata) -> Self {
Self {
document,
components: IndexMap::new(),
edges: Vec::new(),
extensions: FormatExtensions::default(),
content_hash: 0,
primary_component_id: None,
collision_count: 0,
}
}
pub fn add_component(&mut self, component: Component) -> bool {
let id = component.canonical_id.clone();
if let Some(existing) = self.components.get(&id) {
if existing.identifiers.format_id != component.identifiers.format_id
|| existing.name != component.name
{
self.collision_count += 1;
}
self.components.insert(id, component);
true
} else {
self.components.insert(id, component);
false
}
}
pub fn log_collision_summary(&self) {
if self.collision_count > 0 {
tracing::info!(
collision_count = self.collision_count,
"Canonical ID collisions: {} distinct components resolved to the same ID \
and were overwritten. Consider adding PURL identifiers to disambiguate.",
self.collision_count
);
}
}
pub fn add_edge(&mut self, edge: DependencyEdge) {
self.edges.push(edge);
}
#[must_use]
pub fn get_component(&self, id: &CanonicalId) -> Option<&Component> {
self.components.get(id)
}
#[must_use]
pub fn get_dependencies(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
self.edges.iter().filter(|e| &e.from == id).collect()
}
#[must_use]
pub fn get_dependents(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
self.edges.iter().filter(|e| &e.to == id).collect()
}
pub fn calculate_content_hash(&mut self) {
let mut hasher_input = Vec::new();
if let Ok(meta_json) = serde_json::to_vec(&self.document) {
hasher_input.extend(meta_json);
}
let mut component_ids: Vec<_> = self.components.keys().collect();
component_ids.sort_by(|a, b| a.value().cmp(b.value()));
for id in component_ids {
if let Some(comp) = self.components.get(id) {
hasher_input.extend(comp.content_hash.to_le_bytes());
}
}
let mut edge_keys: Vec<_> = self
.edges
.iter()
.map(|edge| {
(
edge.from.value(),
edge.to.value(),
edge.relationship.to_string(),
edge.scope
.as_ref()
.map_or(String::new(), std::string::ToString::to_string),
)
})
.collect();
edge_keys.sort();
for (from, to, relationship, scope) in &edge_keys {
hasher_input.extend(from.as_bytes());
hasher_input.extend(to.as_bytes());
hasher_input.extend(relationship.as_bytes());
hasher_input.extend(scope.as_bytes());
}
self.content_hash = xxh3_64(&hasher_input);
}
#[must_use]
pub fn component_count(&self) -> usize {
self.components.len()
}
#[must_use]
pub fn primary_component(&self) -> Option<&Component> {
self.primary_component_id
.as_ref()
.and_then(|id| self.components.get(id))
}
pub fn set_primary_component(&mut self, id: CanonicalId) {
self.primary_component_id = Some(id);
}
pub fn ecosystems(&self) -> Vec<&Ecosystem> {
let mut ecosystems: Vec<_> = self
.components
.values()
.filter_map(|c| c.ecosystem.as_ref())
.collect();
ecosystems.sort_by_key(std::string::ToString::to_string);
ecosystems.dedup();
ecosystems
}
#[must_use]
pub fn all_vulnerabilities(&self) -> Vec<(&Component, &VulnerabilityRef)> {
self.components
.values()
.flat_map(|c| c.vulnerabilities.iter().map(move |v| (c, v)))
.collect()
}
#[must_use]
pub fn vulnerability_counts(&self) -> VulnerabilityCounts {
let mut counts = VulnerabilityCounts::default();
for (_, vuln) in self.all_vulnerabilities() {
match vuln.severity {
Some(super::Severity::Critical) => counts.critical += 1,
Some(super::Severity::High) => counts.high += 1,
Some(super::Severity::Medium) => counts.medium += 1,
Some(super::Severity::Low) => counts.low += 1,
_ => counts.unknown += 1,
}
}
counts
}
pub fn build_index(&self) -> super::NormalizedSbomIndex {
super::NormalizedSbomIndex::build(self)
}
#[must_use]
pub fn get_dependencies_indexed<'a>(
&'a self,
id: &CanonicalId,
index: &super::NormalizedSbomIndex,
) -> Vec<&'a DependencyEdge> {
index.dependencies_of(id, &self.edges)
}
#[must_use]
pub fn get_dependents_indexed<'a>(
&'a self,
id: &CanonicalId,
index: &super::NormalizedSbomIndex,
) -> Vec<&'a DependencyEdge> {
index.dependents_of(id, &self.edges)
}
#[must_use]
pub fn find_by_name_indexed(
&self,
name: &str,
index: &super::NormalizedSbomIndex,
) -> Vec<&Component> {
let name_lower = name.to_lowercase();
index
.find_by_name_lower(&name_lower)
.iter()
.filter_map(|id| self.components.get(id))
.collect()
}
#[must_use]
pub fn search_by_name_indexed(
&self,
query: &str,
index: &super::NormalizedSbomIndex,
) -> Vec<&Component> {
let query_lower = query.to_lowercase();
index
.search_by_name(&query_lower)
.iter()
.filter_map(|id| self.components.get(id))
.collect()
}
pub fn apply_cra_sidecar(&mut self, sidecar: &super::CraSidecarMetadata) {
if self.document.security_contact.is_none() {
self.document
.security_contact
.clone_from(&sidecar.security_contact);
}
if self.document.vulnerability_disclosure_url.is_none() {
self.document
.vulnerability_disclosure_url
.clone_from(&sidecar.vulnerability_disclosure_url);
}
if self.document.support_end_date.is_none() {
self.document.support_end_date = sidecar.support_end_date;
}
if self.document.name.is_none() {
self.document.name.clone_from(&sidecar.product_name);
}
if let Some(manufacturer) = &sidecar.manufacturer_name {
let has_org = self
.document
.creators
.iter()
.any(|c| c.creator_type == super::CreatorType::Organization);
if !has_org {
self.document.creators.push(super::Creator {
creator_type: super::CreatorType::Organization,
name: manufacturer.clone(),
email: sidecar.manufacturer_email.clone(),
});
}
}
}
}
impl Default for NormalizedSbom {
fn default() -> Self {
Self::new(DocumentMetadata::default())
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct VulnerabilityCounts {
pub critical: usize,
pub high: usize,
pub medium: usize,
pub low: usize,
pub unknown: usize,
}
impl VulnerabilityCounts {
#[must_use]
pub const fn total(&self) -> usize {
self.critical + self.high + self.medium + self.low + self.unknown
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum StalenessLevel {
Fresh,
Aging,
Stale,
Abandoned,
Deprecated,
Archived,
}
impl StalenessLevel {
#[must_use]
pub const fn from_days(days: u32) -> Self {
match days {
0..=182 => Self::Fresh, 183..=365 => Self::Aging, 366..=730 => Self::Stale, _ => Self::Abandoned, }
}
#[must_use]
pub const fn label(&self) -> &'static str {
match self {
Self::Fresh => "Fresh",
Self::Aging => "Aging",
Self::Stale => "Stale",
Self::Abandoned => "Abandoned",
Self::Deprecated => "Deprecated",
Self::Archived => "Archived",
}
}
#[must_use]
pub const fn icon(&self) -> &'static str {
match self {
Self::Fresh => "✓",
Self::Aging => "⏳",
Self::Stale => "⚠",
Self::Abandoned => "⛔",
Self::Deprecated => "⊘",
Self::Archived => "📦",
}
}
#[must_use]
pub const fn severity(&self) -> u8 {
match self {
Self::Fresh => 0,
Self::Aging => 1,
Self::Stale => 2,
Self::Abandoned => 3,
Self::Deprecated | Self::Archived => 4,
}
}
}
impl std::fmt::Display for StalenessLevel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.label())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StalenessInfo {
pub level: StalenessLevel,
pub last_published: Option<chrono::DateTime<chrono::Utc>>,
pub is_deprecated: bool,
pub is_archived: bool,
pub deprecation_message: Option<String>,
pub days_since_update: Option<u32>,
pub latest_version: Option<String>,
}
impl StalenessInfo {
#[must_use]
pub const fn new(level: StalenessLevel) -> Self {
Self {
level,
last_published: None,
is_deprecated: false,
is_archived: false,
deprecation_message: None,
days_since_update: None,
latest_version: None,
}
}
#[must_use]
pub fn from_date(last_published: chrono::DateTime<chrono::Utc>) -> Self {
let days = (chrono::Utc::now() - last_published).num_days().max(0) as u32;
let level = StalenessLevel::from_days(days);
Self {
level,
last_published: Some(last_published),
is_deprecated: false,
is_archived: false,
deprecation_message: None,
days_since_update: Some(days),
latest_version: None,
}
}
#[must_use]
pub const fn needs_attention(&self) -> bool {
self.level.severity() >= 2
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum EolStatus {
Supported,
SecurityOnly,
ApproachingEol,
EndOfLife,
Unknown,
}
impl EolStatus {
#[must_use]
pub const fn label(&self) -> &'static str {
match self {
Self::Supported => "Supported",
Self::SecurityOnly => "Security Only",
Self::ApproachingEol => "Approaching EOL",
Self::EndOfLife => "End of Life",
Self::Unknown => "Unknown",
}
}
#[must_use]
pub const fn icon(&self) -> &'static str {
match self {
Self::Supported => "✓",
Self::SecurityOnly => "🔒",
Self::ApproachingEol => "⚠",
Self::EndOfLife => "⛔",
Self::Unknown => "?",
}
}
#[must_use]
pub const fn severity(&self) -> u8 {
match self {
Self::Supported => 0,
Self::SecurityOnly => 1,
Self::ApproachingEol => 2,
Self::EndOfLife => 3,
Self::Unknown => 0,
}
}
}
impl std::fmt::Display for EolStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.label())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EolInfo {
pub status: EolStatus,
pub product: String,
pub cycle: String,
pub eol_date: Option<chrono::NaiveDate>,
pub support_end_date: Option<chrono::NaiveDate>,
pub is_lts: bool,
pub latest_in_cycle: Option<String>,
pub latest_release_date: Option<chrono::NaiveDate>,
pub days_until_eol: Option<i64>,
}
impl EolInfo {
#[must_use]
pub const fn needs_attention(&self) -> bool {
self.status.severity() >= 2
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Component {
pub canonical_id: CanonicalId,
pub identifiers: ComponentIdentifiers,
pub name: String,
pub version: Option<String>,
pub semver: Option<semver::Version>,
pub component_type: ComponentType,
pub ecosystem: Option<Ecosystem>,
pub licenses: LicenseInfo,
pub supplier: Option<Organization>,
pub hashes: Vec<Hash>,
pub external_refs: Vec<ExternalReference>,
pub vulnerabilities: Vec<VulnerabilityRef>,
pub vex_status: Option<VexStatus>,
pub content_hash: u64,
pub extensions: ComponentExtensions,
pub description: Option<String>,
pub copyright: Option<String>,
pub author: Option<String>,
pub group: Option<String>,
pub is_external: bool,
pub version_range: Option<String>,
pub staleness: Option<StalenessInfo>,
pub eol: Option<EolInfo>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub crypto_properties: Option<CryptoProperties>,
}
impl Component {
#[must_use]
pub fn new(name: String, format_id: String) -> Self {
let identifiers = ComponentIdentifiers::new(format_id);
let canonical_id = identifiers.canonical_id();
Self {
canonical_id,
identifiers,
name,
version: None,
semver: None,
component_type: ComponentType::Library,
ecosystem: None,
licenses: LicenseInfo::default(),
supplier: None,
hashes: Vec::new(),
external_refs: Vec::new(),
vulnerabilities: Vec::new(),
vex_status: None,
content_hash: 0,
extensions: ComponentExtensions::default(),
description: None,
copyright: None,
author: None,
group: None,
is_external: false,
version_range: None,
staleness: None,
eol: None,
crypto_properties: None,
}
}
#[must_use]
pub fn with_purl(mut self, purl: String) -> Self {
self.identifiers.purl = Some(purl);
self.canonical_id = self.identifiers.canonical_id();
if let Some(purl_str) = &self.identifiers.purl
&& let Some(purl_type) = purl_str
.strip_prefix("pkg:")
.and_then(|s| s.split('/').next())
{
self.ecosystem = Some(Ecosystem::from_purl_type(purl_type));
}
self
}
#[must_use]
pub fn with_version(mut self, version: String) -> Self {
self.semver = semver::Version::parse(&version).ok();
self.version = Some(version);
self
}
pub fn calculate_content_hash(&mut self) {
let mut hasher_input = Vec::new();
hasher_input.extend(self.name.as_bytes());
if let Some(v) = &self.version {
hasher_input.extend(v.as_bytes());
}
if let Some(purl) = &self.identifiers.purl {
hasher_input.extend(purl.as_bytes());
}
for license in &self.licenses.declared {
hasher_input.extend(license.expression.as_bytes());
}
if let Some(supplier) = &self.supplier {
hasher_input.extend(supplier.name.as_bytes());
}
for hash in &self.hashes {
hasher_input.extend(hash.value.as_bytes());
}
for vuln in &self.vulnerabilities {
hasher_input.extend(vuln.id.as_bytes());
}
if self.is_external {
hasher_input.push(b'E');
}
if let Some(vr) = &self.version_range {
hasher_input.extend(vr.as_bytes());
}
if let Some(cp) = &self.crypto_properties {
hasher_input.extend(cp.asset_type.to_string().as_bytes());
if let Some(oid) = &cp.oid {
hasher_input.extend(oid.as_bytes());
}
if let Some(algo) = &cp.algorithm_properties {
if let Some(family) = &algo.algorithm_family {
hasher_input.extend(family.as_bytes());
}
if let Some(level) = algo.nist_quantum_security_level {
hasher_input.push(level);
}
}
if let Some(mat) = &cp.related_crypto_material_properties
&& let Some(state) = &mat.state
{
hasher_input.extend(state.to_string().as_bytes());
}
if let Some(cert) = &cp.certificate_properties
&& let Some(expiry) = &cert.not_valid_after
{
hasher_input.extend(expiry.to_rfc3339().as_bytes());
}
}
self.content_hash = xxh3_64(&hasher_input);
}
#[must_use]
pub fn is_oss(&self) -> bool {
self.licenses.declared.iter().any(|l| l.is_valid_spdx) || self.identifiers.purl.is_some()
}
#[must_use]
pub fn display_name(&self) -> String {
self.version
.as_ref()
.map_or_else(|| self.name.clone(), |v| format!("{}@{}", self.name, v))
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct DependencyEdge {
pub from: CanonicalId,
pub to: CanonicalId,
pub relationship: DependencyType,
pub scope: Option<DependencyScope>,
}
impl DependencyEdge {
#[must_use]
pub const fn new(from: CanonicalId, to: CanonicalId, relationship: DependencyType) -> Self {
Self {
from,
to,
relationship,
scope: None,
}
}
#[must_use]
pub const fn with_scope(mut self, scope: DependencyScope) -> Self {
self.scope = Some(scope);
self
}
#[must_use]
pub const fn is_direct(&self) -> bool {
matches!(
self.relationship,
DependencyType::DependsOn
| DependencyType::DevDependsOn
| DependencyType::BuildDependsOn
| DependencyType::TestDependsOn
| DependencyType::RuntimeDependsOn
)
}
}