1use super::{
4 CanonicalId, ComponentExtensions, ComponentIdentifiers, ComponentType, DependencyScope,
5 DependencyType, DocumentMetadata, Ecosystem, ExternalReference, FormatExtensions, Hash,
6 LicenseInfo, Organization, VexStatus, VulnerabilityRef,
7};
8use indexmap::IndexMap;
9use serde::{Deserialize, Serialize};
10use xxhash_rust::xxh3::xxh3_64;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct NormalizedSbom {
18 pub document: DocumentMetadata,
20 pub components: IndexMap<CanonicalId, Component>,
22 pub edges: Vec<DependencyEdge>,
24 pub extensions: FormatExtensions,
26 pub content_hash: u64,
28 pub primary_component_id: Option<CanonicalId>,
31 #[serde(skip)]
33 pub collision_count: usize,
34}
35
36impl NormalizedSbom {
37 pub fn new(document: DocumentMetadata) -> Self {
39 Self {
40 document,
41 components: IndexMap::new(),
42 edges: Vec::new(),
43 extensions: FormatExtensions::default(),
44 content_hash: 0,
45 primary_component_id: None,
46 collision_count: 0,
47 }
48 }
49
50 pub fn add_component(&mut self, component: Component) -> bool {
55 let id = component.canonical_id.clone();
56 if let Some(existing) = self.components.get(&id) {
57 if existing.identifiers.format_id != component.identifiers.format_id
59 || existing.name != component.name
60 {
61 self.collision_count += 1;
62 }
63 self.components.insert(id, component);
64 true
65 } else {
66 self.components.insert(id, component);
67 false
68 }
69 }
70
71 pub fn log_collision_summary(&self) {
73 if self.collision_count > 0 {
74 tracing::info!(
75 collision_count = self.collision_count,
76 "Canonical ID collisions: {} distinct components resolved to the same ID \
77 and were overwritten. Consider adding PURL identifiers to disambiguate.",
78 self.collision_count
79 );
80 }
81 }
82
83 pub fn add_edge(&mut self, edge: DependencyEdge) {
85 self.edges.push(edge);
86 }
87
88 pub fn get_component(&self, id: &CanonicalId) -> Option<&Component> {
90 self.components.get(id)
91 }
92
93 pub fn get_dependencies(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
95 self.edges.iter().filter(|e| &e.from == id).collect()
96 }
97
98 pub fn get_dependents(&self, id: &CanonicalId) -> Vec<&DependencyEdge> {
100 self.edges.iter().filter(|e| &e.to == id).collect()
101 }
102
103 pub fn calculate_content_hash(&mut self) {
105 let mut hasher_input = Vec::new();
106
107 if let Ok(meta_json) = serde_json::to_vec(&self.document) {
109 hasher_input.extend(meta_json);
110 }
111
112 let mut component_ids: Vec<_> = self.components.keys().collect();
114 component_ids.sort_by(|a, b| a.value().cmp(b.value()));
115
116 for id in component_ids {
117 if let Some(comp) = self.components.get(id) {
118 hasher_input.extend(comp.content_hash.to_le_bytes());
119 }
120 }
121
122 for edge in &self.edges {
124 hasher_input.extend(edge.from.value().as_bytes());
125 hasher_input.extend(edge.to.value().as_bytes());
126 }
127
128 self.content_hash = xxh3_64(&hasher_input);
129 }
130
131 pub fn component_count(&self) -> usize {
133 self.components.len()
134 }
135
136 pub fn primary_component(&self) -> Option<&Component> {
138 self.primary_component_id
139 .as_ref()
140 .and_then(|id| self.components.get(id))
141 }
142
143 pub fn set_primary_component(&mut self, id: CanonicalId) {
145 self.primary_component_id = Some(id);
146 }
147
148 pub fn ecosystems(&self) -> Vec<&Ecosystem> {
150 let mut ecosystems: Vec<_> = self
151 .components
152 .values()
153 .filter_map(|c| c.ecosystem.as_ref())
154 .collect();
155 ecosystems.sort_by_key(|a| a.to_string());
156 ecosystems.dedup();
157 ecosystems
158 }
159
160 pub fn all_vulnerabilities(&self) -> Vec<(&Component, &VulnerabilityRef)> {
162 self.components
163 .values()
164 .flat_map(|c| c.vulnerabilities.iter().map(move |v| (c, v)))
165 .collect()
166 }
167
168 pub fn vulnerability_counts(&self) -> VulnerabilityCounts {
170 let mut counts = VulnerabilityCounts::default();
171 for (_, vuln) in self.all_vulnerabilities() {
172 match vuln.severity {
173 Some(super::Severity::Critical) => counts.critical += 1,
174 Some(super::Severity::High) => counts.high += 1,
175 Some(super::Severity::Medium) => counts.medium += 1,
176 Some(super::Severity::Low) => counts.low += 1,
177 _ => counts.unknown += 1,
178 }
179 }
180 counts
181 }
182
183 pub fn build_index(&self) -> super::NormalizedSbomIndex {
198 super::NormalizedSbomIndex::build(self)
199 }
200
201 pub fn get_dependencies_indexed<'a>(
205 &'a self,
206 id: &CanonicalId,
207 index: &super::NormalizedSbomIndex,
208 ) -> Vec<&'a DependencyEdge> {
209 index.dependencies_of(id, &self.edges)
210 }
211
212 pub fn get_dependents_indexed<'a>(
216 &'a self,
217 id: &CanonicalId,
218 index: &super::NormalizedSbomIndex,
219 ) -> Vec<&'a DependencyEdge> {
220 index.dependents_of(id, &self.edges)
221 }
222
223 pub fn find_by_name_indexed(
227 &self,
228 name: &str,
229 index: &super::NormalizedSbomIndex,
230 ) -> Vec<&Component> {
231 let name_lower = name.to_lowercase();
232 index
233 .find_by_name_lower(&name_lower)
234 .iter()
235 .filter_map(|id| self.components.get(id))
236 .collect()
237 }
238
239 pub fn search_by_name_indexed(
243 &self,
244 query: &str,
245 index: &super::NormalizedSbomIndex,
246 ) -> Vec<&Component> {
247 let query_lower = query.to_lowercase();
248 index
249 .search_by_name(&query_lower)
250 .iter()
251 .filter_map(|id| self.components.get(id))
252 .collect()
253 }
254
255 pub fn apply_cra_sidecar(&mut self, sidecar: &super::CraSidecarMetadata) {
260 if self.document.security_contact.is_none() {
262 self.document.security_contact = sidecar.security_contact.clone();
263 }
264
265 if self.document.vulnerability_disclosure_url.is_none() {
266 self.document.vulnerability_disclosure_url =
267 sidecar.vulnerability_disclosure_url.clone();
268 }
269
270 if self.document.support_end_date.is_none() {
271 self.document.support_end_date = sidecar.support_end_date;
272 }
273
274 if self.document.name.is_none() {
275 self.document.name = sidecar.product_name.clone();
276 }
277
278 if let Some(manufacturer) = &sidecar.manufacturer_name {
280 let has_org = self
281 .document
282 .creators
283 .iter()
284 .any(|c| c.creator_type == super::CreatorType::Organization);
285
286 if !has_org {
287 self.document.creators.push(super::Creator {
288 creator_type: super::CreatorType::Organization,
289 name: manufacturer.clone(),
290 email: sidecar.manufacturer_email.clone(),
291 });
292 }
293 }
294 }
295}
296
297impl Default for NormalizedSbom {
298 fn default() -> Self {
299 Self::new(DocumentMetadata::default())
300 }
301}
302
303#[derive(Debug, Clone, Default, Serialize, Deserialize)]
305pub struct VulnerabilityCounts {
306 pub critical: usize,
307 pub high: usize,
308 pub medium: usize,
309 pub low: usize,
310 pub unknown: usize,
311}
312
313impl VulnerabilityCounts {
314 pub fn total(&self) -> usize {
315 self.critical + self.high + self.medium + self.low + self.unknown
316 }
317}
318
319#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
321pub enum StalenessLevel {
322 Fresh,
324 Aging,
326 Stale,
328 Abandoned,
330 Deprecated,
332 Archived,
334}
335
336impl StalenessLevel {
337 pub fn from_days(days: u32) -> Self {
339 match days {
340 0..=182 => StalenessLevel::Fresh, 183..=365 => StalenessLevel::Aging, 366..=730 => StalenessLevel::Stale, _ => StalenessLevel::Abandoned, }
345 }
346
347 pub fn label(&self) -> &'static str {
349 match self {
350 StalenessLevel::Fresh => "Fresh",
351 StalenessLevel::Aging => "Aging",
352 StalenessLevel::Stale => "Stale",
353 StalenessLevel::Abandoned => "Abandoned",
354 StalenessLevel::Deprecated => "Deprecated",
355 StalenessLevel::Archived => "Archived",
356 }
357 }
358
359 pub fn icon(&self) -> &'static str {
361 match self {
362 StalenessLevel::Fresh => "✓",
363 StalenessLevel::Aging => "⏳",
364 StalenessLevel::Stale => "⚠",
365 StalenessLevel::Abandoned => "⛔",
366 StalenessLevel::Deprecated => "⊘",
367 StalenessLevel::Archived => "📦",
368 }
369 }
370
371 pub fn severity(&self) -> u8 {
373 match self {
374 StalenessLevel::Fresh => 0,
375 StalenessLevel::Aging => 1,
376 StalenessLevel::Stale => 2,
377 StalenessLevel::Abandoned => 3,
378 StalenessLevel::Deprecated => 4,
379 StalenessLevel::Archived => 4,
380 }
381 }
382}
383
384impl std::fmt::Display for StalenessLevel {
385 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
386 write!(f, "{}", self.label())
387 }
388}
389
390#[derive(Debug, Clone, Serialize, Deserialize)]
392pub struct StalenessInfo {
393 pub level: StalenessLevel,
395 pub last_published: Option<chrono::DateTime<chrono::Utc>>,
397 pub is_deprecated: bool,
399 pub is_archived: bool,
401 pub deprecation_message: Option<String>,
403 pub days_since_update: Option<u32>,
405 pub latest_version: Option<String>,
407}
408
409impl StalenessInfo {
410 pub fn new(level: StalenessLevel) -> Self {
412 Self {
413 level,
414 last_published: None,
415 is_deprecated: false,
416 is_archived: false,
417 deprecation_message: None,
418 days_since_update: None,
419 latest_version: None,
420 }
421 }
422
423 pub fn from_date(last_published: chrono::DateTime<chrono::Utc>) -> Self {
425 let days = (chrono::Utc::now() - last_published).num_days() as u32;
426 let level = StalenessLevel::from_days(days);
427 Self {
428 level,
429 last_published: Some(last_published),
430 is_deprecated: false,
431 is_archived: false,
432 deprecation_message: None,
433 days_since_update: Some(days),
434 latest_version: None,
435 }
436 }
437
438 pub fn needs_attention(&self) -> bool {
440 self.level.severity() >= 2
441 }
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize)]
446pub struct Component {
447 pub canonical_id: CanonicalId,
449 pub identifiers: ComponentIdentifiers,
451 pub name: String,
453 pub version: Option<String>,
455 pub semver: Option<semver::Version>,
457 pub component_type: ComponentType,
459 pub ecosystem: Option<Ecosystem>,
461 pub licenses: LicenseInfo,
463 pub supplier: Option<Organization>,
465 pub hashes: Vec<Hash>,
467 pub external_refs: Vec<ExternalReference>,
469 pub vulnerabilities: Vec<VulnerabilityRef>,
471 pub vex_status: Option<VexStatus>,
473 pub content_hash: u64,
475 pub extensions: ComponentExtensions,
477 pub description: Option<String>,
479 pub copyright: Option<String>,
481 pub author: Option<String>,
483 pub group: Option<String>,
485 pub staleness: Option<StalenessInfo>,
487}
488
489impl Component {
490 pub fn new(name: String, format_id: String) -> Self {
492 let identifiers = ComponentIdentifiers::new(format_id);
493 let canonical_id = identifiers.canonical_id();
494
495 Self {
496 canonical_id,
497 identifiers,
498 name,
499 version: None,
500 semver: None,
501 component_type: ComponentType::Library,
502 ecosystem: None,
503 licenses: LicenseInfo::default(),
504 supplier: None,
505 hashes: Vec::new(),
506 external_refs: Vec::new(),
507 vulnerabilities: Vec::new(),
508 vex_status: None,
509 content_hash: 0,
510 extensions: ComponentExtensions::default(),
511 description: None,
512 copyright: None,
513 author: None,
514 group: None,
515 staleness: None,
516 }
517 }
518
519 pub fn with_purl(mut self, purl: String) -> Self {
521 self.identifiers.purl = Some(purl);
522 self.canonical_id = self.identifiers.canonical_id();
523
524 if let Some(purl_str) = &self.identifiers.purl {
526 if let Some(purl_type) = purl_str
527 .strip_prefix("pkg:")
528 .and_then(|s| s.split('/').next())
529 {
530 self.ecosystem = Some(Ecosystem::from_purl_type(purl_type));
531 }
532 }
533
534 self
535 }
536
537 pub fn with_version(mut self, version: String) -> Self {
539 self.semver = semver::Version::parse(&version).ok();
540 self.version = Some(version);
541 self
542 }
543
544 pub fn calculate_content_hash(&mut self) {
546 let mut hasher_input = Vec::new();
547
548 hasher_input.extend(self.name.as_bytes());
549 if let Some(v) = &self.version {
550 hasher_input.extend(v.as_bytes());
551 }
552 if let Some(purl) = &self.identifiers.purl {
553 hasher_input.extend(purl.as_bytes());
554 }
555 for license in &self.licenses.declared {
556 hasher_input.extend(license.expression.as_bytes());
557 }
558 if let Some(supplier) = &self.supplier {
559 hasher_input.extend(supplier.name.as_bytes());
560 }
561 for hash in &self.hashes {
562 hasher_input.extend(hash.value.as_bytes());
563 }
564 for vuln in &self.vulnerabilities {
565 hasher_input.extend(vuln.id.as_bytes());
566 }
567
568 self.content_hash = xxh3_64(&hasher_input);
569 }
570
571 pub fn is_oss(&self) -> bool {
573 self.licenses.declared.iter().any(|l| l.is_valid_spdx) || self.identifiers.purl.is_some()
575 }
576
577 pub fn display_name(&self) -> String {
579 match &self.version {
580 Some(v) => format!("{}@{}", self.name, v),
581 None => self.name.clone(),
582 }
583 }
584}
585
586#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
588pub struct DependencyEdge {
589 pub from: CanonicalId,
591 pub to: CanonicalId,
593 pub relationship: DependencyType,
595 pub scope: Option<DependencyScope>,
597}
598
599impl DependencyEdge {
600 pub fn new(from: CanonicalId, to: CanonicalId, relationship: DependencyType) -> Self {
602 Self {
603 from,
604 to,
605 relationship,
606 scope: None,
607 }
608 }
609
610 pub fn is_direct(&self) -> bool {
612 matches!(
613 self.relationship,
614 DependencyType::DependsOn
615 | DependencyType::DevDependsOn
616 | DependencyType::BuildDependsOn
617 | DependencyType::TestDependsOn
618 | DependencyType::RuntimeDependsOn
619 )
620 }
621}