1#![doc = include_str!("../readme.md")]
2
3use indexmap::IndexMap;
4use packageurl::PackageUrl;
5use serde::{Deserialize, Serialize};
6use sha2::{Digest, Sha256};
7use std::collections::{BTreeMap, BTreeSet};
8use std::fmt;
9use std::str::FromStr;
10
11#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
26pub struct Sbom {
27 pub metadata: Metadata,
29 pub components: IndexMap<ComponentId, Component>,
31 pub dependencies: BTreeMap<ComponentId, BTreeMap<ComponentId, DependencyKind>>,
33 #[serde(default, skip_serializing_if = "Vec::is_empty")]
35 pub warnings: Vec<String>,
36}
37
38impl Default for Sbom {
39 fn default() -> Self {
40 Self {
41 metadata: Metadata::default(),
42 components: IndexMap::new(),
43 dependencies: BTreeMap::new(),
44 warnings: Vec::new(),
45 }
46 }
47}
48
49#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
55pub struct Metadata {
56 pub timestamp: Option<String>,
58 pub tools: Vec<String>,
60 pub authors: Vec<String>,
62}
63
64#[derive(
73 Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize,
74)]
75#[serde(rename_all = "lowercase")]
76pub enum DependencyKind {
77 #[default]
79 Runtime,
80 Dev,
82 Build,
84 Test,
86 Optional,
88 Provided,
90}
91
92impl fmt::Display for DependencyKind {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94 match self {
95 Self::Runtime => write!(f, "runtime"),
96 Self::Dev => write!(f, "dev"),
97 Self::Build => write!(f, "build"),
98 Self::Test => write!(f, "test"),
99 Self::Optional => write!(f, "optional"),
100 Self::Provided => write!(f, "provided"),
101 }
102 }
103}
104
105#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
125pub struct ComponentId(String);
126
127impl ComponentId {
128 pub fn new(purl: Option<&str>, properties: &[(&str, &str)]) -> Self {
133 if let Some(purl) = purl {
134 if let Ok(parsed) = PackageUrl::from_str(purl) {
136 return ComponentId(parsed.to_string());
137 }
138 return ComponentId(purl.to_string());
139 }
140
141 let mut hasher = Sha256::new();
143 for (k, v) in properties {
144 hasher.update(k.as_bytes());
145 hasher.update(b":");
146 hasher.update(v.as_bytes());
147 hasher.update(b"|");
148 }
149 let hash = hex::encode(hasher.finalize());
150 ComponentId(format!("h:{}", hash))
151 }
152
153 pub fn as_str(&self) -> &str {
155 &self.0
156 }
157}
158
159impl std::fmt::Display for ComponentId {
160 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
161 write!(f, "{}", self.0)
162 }
163}
164
165#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
171pub struct Component {
172 pub id: ComponentId,
174 pub name: String,
176 pub version: Option<String>,
178 pub ecosystem: Option<String>,
180 pub supplier: Option<String>,
182 pub description: Option<String>,
184 pub purl: Option<String>,
186 pub licenses: BTreeSet<String>,
188 pub hashes: BTreeMap<String, String>,
190 pub source_ids: Vec<String>,
192}
193
194impl Component {
195 pub fn new(name: String, version: Option<String>) -> Self {
200 let mut props = vec![("name", name.as_str())];
201 if let Some(v) = &version {
202 props.push(("version", v));
203 }
204 let id = ComponentId::new(None, &props);
205
206 Self {
207 id,
208 name,
209 version,
210 ecosystem: None,
211 supplier: None,
212 description: None,
213 purl: None,
214 licenses: BTreeSet::new(),
215 hashes: BTreeMap::new(),
216 source_ids: Vec::new(),
217 }
218 }
219}
220
221impl Sbom {
222 pub fn normalize(&mut self) {
232 self.components.sort_keys();
234
235 for component in self.components.values_mut() {
237 component.normalize();
238 }
239
240 self.metadata.timestamp = None;
242 self.metadata.tools.clear();
243 self.metadata.authors.clear(); }
245
246 pub fn roots(&self) -> Vec<ComponentId> {
250 let targets: BTreeSet<_> = self
251 .dependencies
252 .values()
253 .flat_map(|children| children.keys())
254 .collect();
255 self.components
256 .keys()
257 .filter(|id| !targets.contains(id))
258 .cloned()
259 .collect()
260 }
261
262 pub fn deps(&self, id: &ComponentId) -> Vec<ComponentId> {
264 self.dependencies
265 .get(id)
266 .map(|d| d.keys().cloned().collect())
267 .unwrap_or_default()
268 }
269
270 pub fn rdeps(&self, id: &ComponentId) -> Vec<ComponentId> {
272 self.dependencies
273 .iter()
274 .filter(|(_, children)| children.contains_key(id))
275 .map(|(parent, _)| parent.clone())
276 .collect()
277 }
278
279 pub fn transitive_deps(&self, id: &ComponentId) -> BTreeSet<ComponentId> {
283 let mut visited = BTreeSet::new();
284 let mut stack = vec![id.clone()];
285 while let Some(current) = stack.pop() {
286 if let Some(children) = self.dependencies.get(¤t) {
287 for child in children.keys() {
288 if visited.insert(child.clone()) {
289 stack.push(child.clone());
290 }
291 }
292 }
293 }
294 visited
295 }
296
297 pub fn ecosystems(&self) -> BTreeSet<String> {
299 self.components
300 .values()
301 .filter_map(|c| c.ecosystem.clone())
302 .collect()
303 }
304
305 pub fn licenses(&self) -> BTreeSet<String> {
307 self.components
308 .values()
309 .flat_map(|c| c.licenses.iter().cloned())
310 .collect()
311 }
312
313 pub fn missing_hashes(&self) -> Vec<ComponentId> {
317 self.components
318 .iter()
319 .filter(|(_, c)| c.hashes.is_empty())
320 .map(|(id, _)| id.clone())
321 .collect()
322 }
323
324 pub fn by_purl(&self, purl: &str) -> Option<&Component> {
326 let id = ComponentId::new(Some(purl), &[]);
327 self.components.get(&id)
328 }
329}
330
331impl Component {
332 pub fn normalize(&mut self) {
337 let normalized_hashes: BTreeMap<String, String> = self
338 .hashes
339 .iter()
340 .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
341 .collect();
342 self.hashes = normalized_hashes;
343 }
344}
345
346pub fn ecosystem_from_purl(purl: &str) -> Option<String> {
360 PackageUrl::from_str(purl).ok().map(|p| p.ty().to_string())
361}
362
363pub fn parse_license_expression(license: &str) -> BTreeSet<String> {
378 match spdx::Expression::parse(license) {
379 Ok(expr) => {
380 let ids: BTreeSet<String> = expr
381 .requirements()
382 .filter_map(|r| r.req.license.id())
383 .map(|id| id.name.to_string())
384 .collect();
385 if ids.is_empty() {
386 BTreeSet::from([license.to_string()])
388 } else {
389 ids
390 }
391 }
392 Err(_) => {
393 BTreeSet::from([license.to_string()])
395 }
396 }
397}
398
399pub fn canonical_algorithm_name(name: &str) -> String {
414 match name.replace('-', "").to_uppercase().as_str() {
415 "MD2" => "MD2",
416 "MD4" => "MD4",
417 "MD5" => "MD5",
418 "MD6" => "MD6",
419 "SHA1" => "SHA-1",
420 "SHA224" => "SHA-224",
421 "SHA256" => "SHA-256",
422 "SHA384" => "SHA-384",
423 "SHA512" => "SHA-512",
424 "SHA3256" => "SHA3-256",
425 "SHA3384" => "SHA3-384",
426 "SHA3512" => "SHA3-512",
427 "BLAKE2B256" => "BLAKE2b-256",
428 "BLAKE2B384" => "BLAKE2b-384",
429 "BLAKE2B512" => "BLAKE2b-512",
430 "BLAKE3" => "BLAKE3",
431 "ADLER32" => "ADLER-32",
432 _ => return name.to_string(),
433 }
434 .to_string()
435}
436
437#[cfg(test)]
438mod tests {
439 use super::*;
440
441 #[test]
442 fn test_component_id_purl() {
443 let purl = "pkg:npm/left-pad@1.3.0";
444 let id = ComponentId::new(Some(purl), &[]);
445 assert_eq!(id.as_str(), purl);
446 }
447
448 #[test]
449 fn test_component_id_hash_stability() {
450 let props = [("name", "foo"), ("version", "1.0")];
451 let id1 = ComponentId::new(None, &props);
452 let id2 = ComponentId::new(None, &props);
453 assert_eq!(id1, id2);
454 assert!(id1.as_str().starts_with("h:"));
455 }
456
457 #[test]
458 fn test_normalization() {
459 let mut comp = Component::new("test".to_string(), Some("1.0".to_string()));
460 comp.licenses.insert("MIT".to_string());
461 comp.licenses.insert("Apache-2.0".to_string());
462 comp.hashes.insert("SHA-256".to_string(), "ABC".to_string());
463
464 comp.normalize();
465
466 assert_eq!(
468 comp.licenses,
469 BTreeSet::from(["Apache-2.0".to_string(), "MIT".to_string()])
470 );
471 assert_eq!(comp.hashes.get("sha-256").unwrap(), "abc");
472 }
473
474 #[test]
475 fn test_parse_license_expression() {
476 let ids = parse_license_expression("MIT OR Apache-2.0");
478 assert!(ids.contains("MIT"));
479 assert!(ids.contains("Apache-2.0"));
480 assert_eq!(ids.len(), 2);
481
482 let ids = parse_license_expression("MIT");
484 assert_eq!(ids, BTreeSet::from(["MIT".to_string()]));
485
486 let ids = parse_license_expression("MIT AND Apache-2.0");
488 assert!(ids.contains("MIT"));
489 assert!(ids.contains("Apache-2.0"));
490
491 let ids = parse_license_expression("Custom License");
493 assert_eq!(ids, BTreeSet::from(["Custom License".to_string()]));
494
495 let ids = parse_license_expression("LicenseRef-proprietary");
497 assert_eq!(ids, BTreeSet::from(["LicenseRef-proprietary".to_string()]));
498 }
499
500 #[test]
501 fn test_license_set_equality() {
502 let mut c1 = Component::new("test".into(), None);
504 c1.licenses.insert("MIT".into());
505 c1.licenses.insert("Apache-2.0".into());
506
507 let mut c2 = Component::new("test".into(), None);
508 c2.licenses.insert("Apache-2.0".into());
509 c2.licenses.insert("MIT".into());
510
511 assert_eq!(c1.licenses, c2.licenses);
512 }
513
514 #[test]
515 fn test_query_api() {
516 let mut sbom = Sbom::default();
517 let c1 = Component::new("a".into(), Some("1".into()));
518 let c2 = Component::new("b".into(), Some("1".into()));
519 let c3 = Component::new("c".into(), Some("1".into()));
520
521 let id1 = c1.id.clone();
522 let id2 = c2.id.clone();
523 let id3 = c3.id.clone();
524
525 sbom.components.insert(id1.clone(), c1);
526 sbom.components.insert(id2.clone(), c2);
527 sbom.components.insert(id3.clone(), c3);
528
529 sbom.dependencies
531 .entry(id1.clone())
532 .or_default()
533 .insert(id2.clone(), DependencyKind::Runtime);
534 sbom.dependencies
535 .entry(id2.clone())
536 .or_default()
537 .insert(id3.clone(), DependencyKind::Runtime);
538
539 assert_eq!(sbom.roots(), vec![id1.clone()]);
540 assert_eq!(sbom.deps(&id1), vec![id2.clone()]);
541 assert_eq!(sbom.rdeps(&id2), vec![id1.clone()]);
542
543 let transitive = sbom.transitive_deps(&id1);
544 assert!(transitive.contains(&id2));
545 assert!(transitive.contains(&id3));
546 assert_eq!(transitive.len(), 2);
547
548 assert_eq!(sbom.missing_hashes().len(), 3);
549 }
550
551 #[test]
552 fn test_ecosystems_query() {
553 let mut sbom = Sbom::default();
554
555 let mut c1 = Component::new("lodash".into(), Some("1.0".into()));
556 c1.ecosystem = Some("npm".into());
557 let mut c2 = Component::new("serde".into(), Some("1.0".into()));
558 c2.ecosystem = Some("cargo".into());
559 let mut c3 = Component::new("other-npm".into(), Some("1.0".into()));
560 c3.ecosystem = Some("npm".into());
561 let c4 = Component::new("no-ecosystem".into(), Some("1.0".into()));
562
563 sbom.components.insert(c1.id.clone(), c1);
564 sbom.components.insert(c2.id.clone(), c2);
565 sbom.components.insert(c3.id.clone(), c3);
566 sbom.components.insert(c4.id.clone(), c4);
567
568 let ecosystems = sbom.ecosystems();
569 assert_eq!(ecosystems.len(), 2);
570 assert!(ecosystems.contains("npm"));
571 assert!(ecosystems.contains("cargo"));
572 }
573
574 #[test]
575 fn test_licenses_query() {
576 let mut sbom = Sbom::default();
577
578 let mut c1 = Component::new("a".into(), Some("1.0".into()));
579 c1.licenses.insert("MIT".into());
580 c1.licenses.insert("Apache-2.0".into());
581 let mut c2 = Component::new("b".into(), Some("1.0".into()));
582 c2.licenses.insert("MIT".into());
583 c2.licenses.insert("GPL-3.0-only".into());
584 let c3 = Component::new("c".into(), Some("1.0".into()));
585
586 sbom.components.insert(c1.id.clone(), c1);
587 sbom.components.insert(c2.id.clone(), c2);
588 sbom.components.insert(c3.id.clone(), c3);
589
590 let licenses = sbom.licenses();
591 assert_eq!(licenses.len(), 3);
592 assert!(licenses.contains("MIT"));
593 assert!(licenses.contains("Apache-2.0"));
594 assert!(licenses.contains("GPL-3.0-only"));
595 }
596
597 #[test]
598 fn test_by_purl() {
599 let mut sbom = Sbom::default();
600
601 let mut c1 = Component::new("lodash".into(), Some("4.17.21".into()));
602 c1.purl = Some("pkg:npm/lodash@4.17.21".into());
603 c1.id = ComponentId::new(c1.purl.as_deref(), &[]);
604 let c2 = Component::new("no-purl".into(), Some("1.0".into()));
605
606 sbom.components.insert(c1.id.clone(), c1);
607 sbom.components.insert(c2.id.clone(), c2);
608
609 let found = sbom.by_purl("pkg:npm/lodash@4.17.21");
610 assert!(found.is_some());
611 assert_eq!(found.unwrap().name, "lodash");
612
613 assert!(sbom.by_purl("pkg:npm/nonexistent@1.0").is_none());
614 }
615
616 #[test]
617 fn test_component_id_unparseable_purl() {
618 let id = ComponentId::new(Some("not-a-valid-purl-but-still-a-string"), &[]);
620 assert_eq!(id.as_str(), "not-a-valid-purl-but-still-a-string");
621 }
622
623 #[test]
624 fn test_component_id_display() {
625 let id = ComponentId::new(Some("pkg:npm/foo@1.0"), &[]);
626 assert_eq!(format!("{}", id), "pkg:npm/foo@1.0");
627 }
628
629 #[test]
630 fn test_sbom_normalize_clears_metadata() {
631 let mut sbom = Sbom::default();
632 sbom.metadata.timestamp = Some("2024-01-01T00:00:00Z".into());
633 sbom.metadata.tools.push("syft".into());
634 sbom.metadata.authors.push("alice".into());
635
636 let c = Component::new("a".into(), Some("1".into()));
637 sbom.components.insert(c.id.clone(), c);
638
639 sbom.normalize();
640
641 assert!(sbom.metadata.timestamp.is_none());
642 assert!(sbom.metadata.tools.is_empty());
643 assert!(sbom.metadata.authors.is_empty());
644 }
645
646 #[test]
647 fn test_missing_hashes_mixed() {
648 let mut sbom = Sbom::default();
649
650 let c1 = Component::new("no-hash".into(), Some("1.0".into()));
651 let mut c2 = Component::new("has-hash".into(), Some("1.0".into()));
652 c2.hashes.insert("sha256".into(), "abc".into());
653
654 sbom.components.insert(c1.id.clone(), c1);
655 sbom.components.insert(c2.id.clone(), c2);
656
657 let missing = sbom.missing_hashes();
658 assert_eq!(missing.len(), 1);
659 }
660
661 #[test]
662 fn test_ecosystem_from_purl() {
663 use super::ecosystem_from_purl;
664
665 assert_eq!(
666 ecosystem_from_purl("pkg:npm/lodash@4.17.21"),
667 Some("npm".to_string())
668 );
669 assert_eq!(
670 ecosystem_from_purl("pkg:cargo/serde@1.0.0"),
671 Some("cargo".to_string())
672 );
673 assert_eq!(
674 ecosystem_from_purl("pkg:pypi/requests@2.28.0"),
675 Some("pypi".to_string())
676 );
677 assert_eq!(
678 ecosystem_from_purl("pkg:maven/org.apache/commons@1.0"),
679 Some("maven".to_string())
680 );
681 assert_eq!(ecosystem_from_purl("invalid-purl"), None);
682 assert_eq!(ecosystem_from_purl(""), None);
683 }
684
685 #[test]
686 fn test_canonical_algorithm_name() {
687 assert_eq!(canonical_algorithm_name("SHA256"), "SHA-256");
689 assert_eq!(canonical_algorithm_name("SHA1"), "SHA-1");
690 assert_eq!(canonical_algorithm_name("SHA384"), "SHA-384");
691 assert_eq!(canonical_algorithm_name("SHA512"), "SHA-512");
692 assert_eq!(canonical_algorithm_name("SHA224"), "SHA-224");
693
694 assert_eq!(canonical_algorithm_name("SHA-256"), "SHA-256");
696 assert_eq!(canonical_algorithm_name("SHA-1"), "SHA-1");
697 assert_eq!(canonical_algorithm_name("SHA-384"), "SHA-384");
698
699 assert_eq!(canonical_algorithm_name("sha256"), "SHA-256");
701 assert_eq!(canonical_algorithm_name("sha-256"), "SHA-256");
702
703 assert_eq!(canonical_algorithm_name("SHA3-256"), "SHA3-256");
705 assert_eq!(canonical_algorithm_name("SHA3256"), "SHA3-256");
706
707 assert_eq!(canonical_algorithm_name("MD5"), "MD5");
709 assert_eq!(canonical_algorithm_name("md5"), "MD5");
710
711 assert_eq!(canonical_algorithm_name("BLAKE2b-256"), "BLAKE2b-256");
713 assert_eq!(canonical_algorithm_name("BLAKE2B256"), "BLAKE2b-256");
714 assert_eq!(canonical_algorithm_name("BLAKE3"), "BLAKE3");
715
716 assert_eq!(canonical_algorithm_name("ADLER32"), "ADLER-32");
718 assert_eq!(canonical_algorithm_name("ADLER-32"), "ADLER-32");
719
720 assert_eq!(canonical_algorithm_name("TIGER"), "TIGER");
722 }
723}