1#![doc = include_str!("../readme.md")]
2
3use indexmap::IndexMap;
4use packageurl::PackageUrl;
5use serde::{Deserialize, Serialize};
6use sha2::{Digest, Sha256};
7use std::collections::{BTreeMap, BTreeSet};
8use std::str::FromStr;
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
25pub struct Sbom {
26 pub metadata: Metadata,
28 pub components: IndexMap<ComponentId, Component>,
30 pub dependencies: BTreeMap<ComponentId, BTreeSet<ComponentId>>,
32 #[serde(default, skip_serializing_if = "Vec::is_empty")]
34 pub warnings: Vec<String>,
35}
36
37impl Default for Sbom {
38 fn default() -> Self {
39 Self {
40 metadata: Metadata::default(),
41 components: IndexMap::new(),
42 dependencies: BTreeMap::new(),
43 warnings: Vec::new(),
44 }
45 }
46}
47
48#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
54pub struct Metadata {
55 pub timestamp: Option<String>,
57 pub tools: Vec<String>,
59 pub authors: Vec<String>,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
83pub struct ComponentId(String);
84
85impl ComponentId {
86 pub fn new(purl: Option<&str>, properties: &[(&str, &str)]) -> Self {
91 if let Some(purl) = purl {
92 if let Ok(parsed) = PackageUrl::from_str(purl) {
94 return ComponentId(parsed.to_string());
95 }
96 return ComponentId(purl.to_string());
97 }
98
99 let mut hasher = Sha256::new();
101 for (k, v) in properties {
102 hasher.update(k.as_bytes());
103 hasher.update(b":");
104 hasher.update(v.as_bytes());
105 hasher.update(b"|");
106 }
107 let hash = hex::encode(hasher.finalize());
108 ComponentId(format!("h:{}", hash))
109 }
110
111 pub fn as_str(&self) -> &str {
113 &self.0
114 }
115}
116
117impl std::fmt::Display for ComponentId {
118 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119 write!(f, "{}", self.0)
120 }
121}
122
123#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
129pub struct Component {
130 pub id: ComponentId,
132 pub name: String,
134 pub version: Option<String>,
136 pub ecosystem: Option<String>,
138 pub supplier: Option<String>,
140 pub description: Option<String>,
142 pub purl: Option<String>,
144 pub licenses: BTreeSet<String>,
146 pub hashes: BTreeMap<String, String>,
148 pub source_ids: Vec<String>,
150}
151
152impl Component {
153 pub fn new(name: String, version: Option<String>) -> Self {
158 let mut props = vec![("name", name.as_str())];
159 if let Some(v) = &version {
160 props.push(("version", v));
161 }
162 let id = ComponentId::new(None, &props);
163
164 Self {
165 id,
166 name,
167 version,
168 ecosystem: None,
169 supplier: None,
170 description: None,
171 purl: None,
172 licenses: BTreeSet::new(),
173 hashes: BTreeMap::new(),
174 source_ids: Vec::new(),
175 }
176 }
177}
178
179impl Sbom {
180 pub fn normalize(&mut self) {
190 self.components.sort_keys();
192
193 for component in self.components.values_mut() {
195 component.normalize();
196 }
197
198 self.metadata.timestamp = None;
200 self.metadata.tools.clear();
201 self.metadata.authors.clear(); }
203
204 pub fn roots(&self) -> Vec<ComponentId> {
208 let targets: BTreeSet<_> = self.dependencies.values().flatten().collect();
209 self.components
210 .keys()
211 .filter(|id| !targets.contains(id))
212 .cloned()
213 .collect()
214 }
215
216 pub fn deps(&self, id: &ComponentId) -> Vec<ComponentId> {
218 self.dependencies
219 .get(id)
220 .map(|d| d.iter().cloned().collect())
221 .unwrap_or_default()
222 }
223
224 pub fn rdeps(&self, id: &ComponentId) -> Vec<ComponentId> {
226 self.dependencies
227 .iter()
228 .filter(|(_, children)| children.contains(id))
229 .map(|(parent, _)| parent.clone())
230 .collect()
231 }
232
233 pub fn transitive_deps(&self, id: &ComponentId) -> BTreeSet<ComponentId> {
237 let mut visited = BTreeSet::new();
238 let mut stack = vec![id.clone()];
239 while let Some(current) = stack.pop() {
240 if let Some(children) = self.dependencies.get(¤t) {
241 for child in children {
242 if visited.insert(child.clone()) {
243 stack.push(child.clone());
244 }
245 }
246 }
247 }
248 visited
249 }
250
251 pub fn ecosystems(&self) -> BTreeSet<String> {
253 self.components
254 .values()
255 .filter_map(|c| c.ecosystem.clone())
256 .collect()
257 }
258
259 pub fn licenses(&self) -> BTreeSet<String> {
261 self.components
262 .values()
263 .flat_map(|c| c.licenses.iter().cloned())
264 .collect()
265 }
266
267 pub fn missing_hashes(&self) -> Vec<ComponentId> {
271 self.components
272 .iter()
273 .filter(|(_, c)| c.hashes.is_empty())
274 .map(|(id, _)| id.clone())
275 .collect()
276 }
277
278 pub fn by_purl(&self, purl: &str) -> Option<&Component> {
280 let id = ComponentId::new(Some(purl), &[]);
281 self.components.get(&id)
282 }
283}
284
285impl Component {
286 pub fn normalize(&mut self) {
291 let normalized_hashes: BTreeMap<String, String> = self
292 .hashes
293 .iter()
294 .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
295 .collect();
296 self.hashes = normalized_hashes;
297 }
298}
299
300pub fn ecosystem_from_purl(purl: &str) -> Option<String> {
314 PackageUrl::from_str(purl).ok().map(|p| p.ty().to_string())
315}
316
317pub fn parse_license_expression(license: &str) -> BTreeSet<String> {
332 match spdx::Expression::parse(license) {
333 Ok(expr) => {
334 let ids: BTreeSet<String> = expr
335 .requirements()
336 .filter_map(|r| r.req.license.id())
337 .map(|id| id.name.to_string())
338 .collect();
339 if ids.is_empty() {
340 BTreeSet::from([license.to_string()])
342 } else {
343 ids
344 }
345 }
346 Err(_) => {
347 BTreeSet::from([license.to_string()])
349 }
350 }
351}
352
353pub fn canonical_algorithm_name(name: &str) -> String {
368 match name.replace('-', "").to_uppercase().as_str() {
369 "MD2" => "MD2",
370 "MD4" => "MD4",
371 "MD5" => "MD5",
372 "MD6" => "MD6",
373 "SHA1" => "SHA-1",
374 "SHA224" => "SHA-224",
375 "SHA256" => "SHA-256",
376 "SHA384" => "SHA-384",
377 "SHA512" => "SHA-512",
378 "SHA3256" => "SHA3-256",
379 "SHA3384" => "SHA3-384",
380 "SHA3512" => "SHA3-512",
381 "BLAKE2B256" => "BLAKE2b-256",
382 "BLAKE2B384" => "BLAKE2b-384",
383 "BLAKE2B512" => "BLAKE2b-512",
384 "BLAKE3" => "BLAKE3",
385 "ADLER32" => "ADLER-32",
386 _ => return name.to_string(),
387 }
388 .to_string()
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 #[test]
396 fn test_component_id_purl() {
397 let purl = "pkg:npm/left-pad@1.3.0";
398 let id = ComponentId::new(Some(purl), &[]);
399 assert_eq!(id.as_str(), purl);
400 }
401
402 #[test]
403 fn test_component_id_hash_stability() {
404 let props = [("name", "foo"), ("version", "1.0")];
405 let id1 = ComponentId::new(None, &props);
406 let id2 = ComponentId::new(None, &props);
407 assert_eq!(id1, id2);
408 assert!(id1.as_str().starts_with("h:"));
409 }
410
411 #[test]
412 fn test_normalization() {
413 let mut comp = Component::new("test".to_string(), Some("1.0".to_string()));
414 comp.licenses.insert("MIT".to_string());
415 comp.licenses.insert("Apache-2.0".to_string());
416 comp.hashes.insert("SHA-256".to_string(), "ABC".to_string());
417
418 comp.normalize();
419
420 assert_eq!(
422 comp.licenses,
423 BTreeSet::from(["Apache-2.0".to_string(), "MIT".to_string()])
424 );
425 assert_eq!(comp.hashes.get("sha-256").unwrap(), "abc");
426 }
427
428 #[test]
429 fn test_parse_license_expression() {
430 let ids = parse_license_expression("MIT OR Apache-2.0");
432 assert!(ids.contains("MIT"));
433 assert!(ids.contains("Apache-2.0"));
434 assert_eq!(ids.len(), 2);
435
436 let ids = parse_license_expression("MIT");
438 assert_eq!(ids, BTreeSet::from(["MIT".to_string()]));
439
440 let ids = parse_license_expression("MIT AND Apache-2.0");
442 assert!(ids.contains("MIT"));
443 assert!(ids.contains("Apache-2.0"));
444
445 let ids = parse_license_expression("Custom License");
447 assert_eq!(ids, BTreeSet::from(["Custom License".to_string()]));
448
449 let ids = parse_license_expression("LicenseRef-proprietary");
451 assert_eq!(ids, BTreeSet::from(["LicenseRef-proprietary".to_string()]));
452 }
453
454 #[test]
455 fn test_license_set_equality() {
456 let mut c1 = Component::new("test".into(), None);
458 c1.licenses.insert("MIT".into());
459 c1.licenses.insert("Apache-2.0".into());
460
461 let mut c2 = Component::new("test".into(), None);
462 c2.licenses.insert("Apache-2.0".into());
463 c2.licenses.insert("MIT".into());
464
465 assert_eq!(c1.licenses, c2.licenses);
466 }
467
468 #[test]
469 fn test_query_api() {
470 let mut sbom = Sbom::default();
471 let c1 = Component::new("a".into(), Some("1".into()));
472 let c2 = Component::new("b".into(), Some("1".into()));
473 let c3 = Component::new("c".into(), Some("1".into()));
474
475 let id1 = c1.id.clone();
476 let id2 = c2.id.clone();
477 let id3 = c3.id.clone();
478
479 sbom.components.insert(id1.clone(), c1);
480 sbom.components.insert(id2.clone(), c2);
481 sbom.components.insert(id3.clone(), c3);
482
483 sbom.dependencies
485 .entry(id1.clone())
486 .or_default()
487 .insert(id2.clone());
488 sbom.dependencies
489 .entry(id2.clone())
490 .or_default()
491 .insert(id3.clone());
492
493 assert_eq!(sbom.roots(), vec![id1.clone()]);
494 assert_eq!(sbom.deps(&id1), vec![id2.clone()]);
495 assert_eq!(sbom.rdeps(&id2), vec![id1.clone()]);
496
497 let transitive = sbom.transitive_deps(&id1);
498 assert!(transitive.contains(&id2));
499 assert!(transitive.contains(&id3));
500 assert_eq!(transitive.len(), 2);
501
502 assert_eq!(sbom.missing_hashes().len(), 3);
503 }
504
505 #[test]
506 fn test_ecosystems_query() {
507 let mut sbom = Sbom::default();
508
509 let mut c1 = Component::new("lodash".into(), Some("1.0".into()));
510 c1.ecosystem = Some("npm".into());
511 let mut c2 = Component::new("serde".into(), Some("1.0".into()));
512 c2.ecosystem = Some("cargo".into());
513 let mut c3 = Component::new("other-npm".into(), Some("1.0".into()));
514 c3.ecosystem = Some("npm".into());
515 let c4 = Component::new("no-ecosystem".into(), Some("1.0".into()));
516
517 sbom.components.insert(c1.id.clone(), c1);
518 sbom.components.insert(c2.id.clone(), c2);
519 sbom.components.insert(c3.id.clone(), c3);
520 sbom.components.insert(c4.id.clone(), c4);
521
522 let ecosystems = sbom.ecosystems();
523 assert_eq!(ecosystems.len(), 2);
524 assert!(ecosystems.contains("npm"));
525 assert!(ecosystems.contains("cargo"));
526 }
527
528 #[test]
529 fn test_licenses_query() {
530 let mut sbom = Sbom::default();
531
532 let mut c1 = Component::new("a".into(), Some("1.0".into()));
533 c1.licenses.insert("MIT".into());
534 c1.licenses.insert("Apache-2.0".into());
535 let mut c2 = Component::new("b".into(), Some("1.0".into()));
536 c2.licenses.insert("MIT".into());
537 c2.licenses.insert("GPL-3.0-only".into());
538 let c3 = Component::new("c".into(), Some("1.0".into()));
539
540 sbom.components.insert(c1.id.clone(), c1);
541 sbom.components.insert(c2.id.clone(), c2);
542 sbom.components.insert(c3.id.clone(), c3);
543
544 let licenses = sbom.licenses();
545 assert_eq!(licenses.len(), 3);
546 assert!(licenses.contains("MIT"));
547 assert!(licenses.contains("Apache-2.0"));
548 assert!(licenses.contains("GPL-3.0-only"));
549 }
550
551 #[test]
552 fn test_by_purl() {
553 let mut sbom = Sbom::default();
554
555 let mut c1 = Component::new("lodash".into(), Some("4.17.21".into()));
556 c1.purl = Some("pkg:npm/lodash@4.17.21".into());
557 c1.id = ComponentId::new(c1.purl.as_deref(), &[]);
558 let c2 = Component::new("no-purl".into(), Some("1.0".into()));
559
560 sbom.components.insert(c1.id.clone(), c1);
561 sbom.components.insert(c2.id.clone(), c2);
562
563 let found = sbom.by_purl("pkg:npm/lodash@4.17.21");
564 assert!(found.is_some());
565 assert_eq!(found.unwrap().name, "lodash");
566
567 assert!(sbom.by_purl("pkg:npm/nonexistent@1.0").is_none());
568 }
569
570 #[test]
571 fn test_component_id_unparseable_purl() {
572 let id = ComponentId::new(Some("not-a-valid-purl-but-still-a-string"), &[]);
574 assert_eq!(id.as_str(), "not-a-valid-purl-but-still-a-string");
575 }
576
577 #[test]
578 fn test_component_id_display() {
579 let id = ComponentId::new(Some("pkg:npm/foo@1.0"), &[]);
580 assert_eq!(format!("{}", id), "pkg:npm/foo@1.0");
581 }
582
583 #[test]
584 fn test_sbom_normalize_clears_metadata() {
585 let mut sbom = Sbom::default();
586 sbom.metadata.timestamp = Some("2024-01-01T00:00:00Z".into());
587 sbom.metadata.tools.push("syft".into());
588 sbom.metadata.authors.push("alice".into());
589
590 let c = Component::new("a".into(), Some("1".into()));
591 sbom.components.insert(c.id.clone(), c);
592
593 sbom.normalize();
594
595 assert!(sbom.metadata.timestamp.is_none());
596 assert!(sbom.metadata.tools.is_empty());
597 assert!(sbom.metadata.authors.is_empty());
598 }
599
600 #[test]
601 fn test_missing_hashes_mixed() {
602 let mut sbom = Sbom::default();
603
604 let c1 = Component::new("no-hash".into(), Some("1.0".into()));
605 let mut c2 = Component::new("has-hash".into(), Some("1.0".into()));
606 c2.hashes.insert("sha256".into(), "abc".into());
607
608 sbom.components.insert(c1.id.clone(), c1);
609 sbom.components.insert(c2.id.clone(), c2);
610
611 let missing = sbom.missing_hashes();
612 assert_eq!(missing.len(), 1);
613 }
614
615 #[test]
616 fn test_ecosystem_from_purl() {
617 use super::ecosystem_from_purl;
618
619 assert_eq!(
620 ecosystem_from_purl("pkg:npm/lodash@4.17.21"),
621 Some("npm".to_string())
622 );
623 assert_eq!(
624 ecosystem_from_purl("pkg:cargo/serde@1.0.0"),
625 Some("cargo".to_string())
626 );
627 assert_eq!(
628 ecosystem_from_purl("pkg:pypi/requests@2.28.0"),
629 Some("pypi".to_string())
630 );
631 assert_eq!(
632 ecosystem_from_purl("pkg:maven/org.apache/commons@1.0"),
633 Some("maven".to_string())
634 );
635 assert_eq!(ecosystem_from_purl("invalid-purl"), None);
636 assert_eq!(ecosystem_from_purl(""), None);
637 }
638
639 #[test]
640 fn test_canonical_algorithm_name() {
641 assert_eq!(canonical_algorithm_name("SHA256"), "SHA-256");
643 assert_eq!(canonical_algorithm_name("SHA1"), "SHA-1");
644 assert_eq!(canonical_algorithm_name("SHA384"), "SHA-384");
645 assert_eq!(canonical_algorithm_name("SHA512"), "SHA-512");
646 assert_eq!(canonical_algorithm_name("SHA224"), "SHA-224");
647
648 assert_eq!(canonical_algorithm_name("SHA-256"), "SHA-256");
650 assert_eq!(canonical_algorithm_name("SHA-1"), "SHA-1");
651 assert_eq!(canonical_algorithm_name("SHA-384"), "SHA-384");
652
653 assert_eq!(canonical_algorithm_name("sha256"), "SHA-256");
655 assert_eq!(canonical_algorithm_name("sha-256"), "SHA-256");
656
657 assert_eq!(canonical_algorithm_name("SHA3-256"), "SHA3-256");
659 assert_eq!(canonical_algorithm_name("SHA3256"), "SHA3-256");
660
661 assert_eq!(canonical_algorithm_name("MD5"), "MD5");
663 assert_eq!(canonical_algorithm_name("md5"), "MD5");
664
665 assert_eq!(canonical_algorithm_name("BLAKE2b-256"), "BLAKE2b-256");
667 assert_eq!(canonical_algorithm_name("BLAKE2B256"), "BLAKE2b-256");
668 assert_eq!(canonical_algorithm_name("BLAKE3"), "BLAKE3");
669
670 assert_eq!(canonical_algorithm_name("ADLER32"), "ADLER-32");
672 assert_eq!(canonical_algorithm_name("ADLER-32"), "ADLER-32");
673
674 assert_eq!(canonical_algorithm_name("TIGER"), "TIGER");
676 }
677}