1#![doc = include_str!("../readme.md")]
2
3use indexmap::IndexMap;
4use packageurl::PackageUrl;
5use serde::{Deserialize, Serialize};
6use sha2::{Digest, Sha256};
7use std::collections::{BTreeMap, BTreeSet};
8use std::str::FromStr;
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
25pub struct Sbom {
26 pub metadata: Metadata,
28 pub components: IndexMap<ComponentId, Component>,
30 pub dependencies: BTreeMap<ComponentId, BTreeSet<ComponentId>>,
32}
33
34impl Default for Sbom {
35 fn default() -> Self {
36 Self {
37 metadata: Metadata::default(),
38 components: IndexMap::new(),
39 dependencies: BTreeMap::new(),
40 }
41 }
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
50pub struct Metadata {
51 pub timestamp: Option<String>,
53 pub tools: Vec<String>,
55 pub authors: Vec<String>,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
79pub struct ComponentId(String);
80
81impl ComponentId {
82 pub fn new(purl: Option<&str>, properties: &[(&str, &str)]) -> Self {
87 if let Some(purl) = purl {
88 if let Ok(parsed) = PackageUrl::from_str(purl) {
90 return ComponentId(parsed.to_string());
91 }
92 return ComponentId(purl.to_string());
93 }
94
95 let mut hasher = Sha256::new();
97 for (k, v) in properties {
98 hasher.update(k.as_bytes());
99 hasher.update(b":");
100 hasher.update(v.as_bytes());
101 hasher.update(b"|");
102 }
103 let hash = hex::encode(hasher.finalize());
104 ComponentId(format!("h:{}", hash))
105 }
106
107 pub fn as_str(&self) -> &str {
109 &self.0
110 }
111}
112
113impl std::fmt::Display for ComponentId {
114 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115 write!(f, "{}", self.0)
116 }
117}
118
119#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
125pub struct Component {
126 pub id: ComponentId,
128 pub name: String,
130 pub version: Option<String>,
132 pub ecosystem: Option<String>,
134 pub supplier: Option<String>,
136 pub description: Option<String>,
138 pub purl: Option<String>,
140 pub licenses: BTreeSet<String>,
142 pub hashes: BTreeMap<String, String>,
144 pub source_ids: Vec<String>,
146}
147
148impl Component {
149 pub fn new(name: String, version: Option<String>) -> Self {
154 let mut props = vec![("name", name.as_str())];
155 if let Some(v) = &version {
156 props.push(("version", v));
157 }
158 let id = ComponentId::new(None, &props);
159
160 Self {
161 id,
162 name,
163 version,
164 ecosystem: None,
165 supplier: None,
166 description: None,
167 purl: None,
168 licenses: BTreeSet::new(),
169 hashes: BTreeMap::new(),
170 source_ids: Vec::new(),
171 }
172 }
173}
174
175impl Sbom {
176 pub fn normalize(&mut self) {
186 self.components.sort_keys();
188
189 for deps in self.dependencies.values_mut() {
191 let _ = deps;
194 }
195
196 for component in self.components.values_mut() {
198 component.normalize();
199 }
200
201 self.metadata.timestamp = None;
203 self.metadata.tools.clear();
204 self.metadata.authors.clear(); }
206
207 pub fn roots(&self) -> Vec<ComponentId> {
211 let targets: BTreeSet<_> = self.dependencies.values().flatten().collect();
212 self.components
213 .keys()
214 .filter(|id| !targets.contains(id))
215 .cloned()
216 .collect()
217 }
218
219 pub fn deps(&self, id: &ComponentId) -> Vec<ComponentId> {
221 self.dependencies
222 .get(id)
223 .map(|d| d.iter().cloned().collect())
224 .unwrap_or_default()
225 }
226
227 pub fn rdeps(&self, id: &ComponentId) -> Vec<ComponentId> {
229 self.dependencies
230 .iter()
231 .filter(|(_, children)| children.contains(id))
232 .map(|(parent, _)| parent.clone())
233 .collect()
234 }
235
236 pub fn transitive_deps(&self, id: &ComponentId) -> BTreeSet<ComponentId> {
240 let mut visited = BTreeSet::new();
241 let mut stack = vec![id.clone()];
242 while let Some(current) = stack.pop() {
243 if let Some(children) = self.dependencies.get(¤t) {
244 for child in children {
245 if visited.insert(child.clone()) {
246 stack.push(child.clone());
247 }
248 }
249 }
250 }
251 visited
252 }
253
254 pub fn ecosystems(&self) -> BTreeSet<String> {
256 self.components
257 .values()
258 .filter_map(|c| c.ecosystem.clone())
259 .collect()
260 }
261
262 pub fn licenses(&self) -> BTreeSet<String> {
264 self.components
265 .values()
266 .flat_map(|c| c.licenses.iter().cloned())
267 .collect()
268 }
269
270 pub fn missing_hashes(&self) -> Vec<ComponentId> {
274 self.components
275 .iter()
276 .filter(|(_, c)| c.hashes.is_empty())
277 .map(|(id, _)| id.clone())
278 .collect()
279 }
280
281 pub fn by_purl(&self, purl: &str) -> Option<&Component> {
283 self.components
284 .values()
285 .find(|c| c.purl.as_deref() == Some(purl))
286 }
287}
288
289impl Component {
290 pub fn normalize(&mut self) {
295 let normalized_hashes: BTreeMap<String, String> = self
296 .hashes
297 .iter()
298 .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
299 .collect();
300 self.hashes = normalized_hashes;
301 }
302}
303
304pub fn ecosystem_from_purl(purl: &str) -> Option<String> {
318 PackageUrl::from_str(purl).ok().map(|p| p.ty().to_string())
319}
320
321pub fn parse_license_expression(license: &str) -> BTreeSet<String> {
336 match spdx::Expression::parse(license) {
337 Ok(expr) => {
338 let ids: BTreeSet<String> = expr
339 .requirements()
340 .filter_map(|r| r.req.license.id())
341 .map(|id| id.name.to_string())
342 .collect();
343 if ids.is_empty() {
344 BTreeSet::from([license.to_string()])
346 } else {
347 ids
348 }
349 }
350 Err(_) => {
351 BTreeSet::from([license.to_string()])
353 }
354 }
355}
356
357#[cfg(test)]
358mod tests {
359 use super::*;
360
361 #[test]
362 fn test_component_id_purl() {
363 let purl = "pkg:npm/left-pad@1.3.0";
364 let id = ComponentId::new(Some(purl), &[]);
365 assert_eq!(id.as_str(), purl);
366 }
367
368 #[test]
369 fn test_component_id_hash_stability() {
370 let props = [("name", "foo"), ("version", "1.0")];
371 let id1 = ComponentId::new(None, &props);
372 let id2 = ComponentId::new(None, &props);
373 assert_eq!(id1, id2);
374 assert!(id1.as_str().starts_with("h:"));
375 }
376
377 #[test]
378 fn test_normalization() {
379 let mut comp = Component::new("test".to_string(), Some("1.0".to_string()));
380 comp.licenses.insert("MIT".to_string());
381 comp.licenses.insert("Apache-2.0".to_string());
382 comp.hashes.insert("SHA-256".to_string(), "ABC".to_string());
383
384 comp.normalize();
385
386 assert_eq!(
388 comp.licenses,
389 BTreeSet::from(["Apache-2.0".to_string(), "MIT".to_string()])
390 );
391 assert_eq!(comp.hashes.get("sha-256").unwrap(), "abc");
392 }
393
394 #[test]
395 fn test_parse_license_expression() {
396 let ids = parse_license_expression("MIT OR Apache-2.0");
398 assert!(ids.contains("MIT"));
399 assert!(ids.contains("Apache-2.0"));
400 assert_eq!(ids.len(), 2);
401
402 let ids = parse_license_expression("MIT");
404 assert_eq!(ids, BTreeSet::from(["MIT".to_string()]));
405
406 let ids = parse_license_expression("MIT AND Apache-2.0");
408 assert!(ids.contains("MIT"));
409 assert!(ids.contains("Apache-2.0"));
410
411 let ids = parse_license_expression("Custom License");
413 assert_eq!(ids, BTreeSet::from(["Custom License".to_string()]));
414
415 let ids = parse_license_expression("LicenseRef-proprietary");
417 assert_eq!(ids, BTreeSet::from(["LicenseRef-proprietary".to_string()]));
418 }
419
420 #[test]
421 fn test_license_set_equality() {
422 let mut c1 = Component::new("test".into(), None);
424 c1.licenses.insert("MIT".into());
425 c1.licenses.insert("Apache-2.0".into());
426
427 let mut c2 = Component::new("test".into(), None);
428 c2.licenses.insert("Apache-2.0".into());
429 c2.licenses.insert("MIT".into());
430
431 assert_eq!(c1.licenses, c2.licenses);
432 }
433
434 #[test]
435 fn test_query_api() {
436 let mut sbom = Sbom::default();
437 let c1 = Component::new("a".into(), Some("1".into()));
438 let c2 = Component::new("b".into(), Some("1".into()));
439 let c3 = Component::new("c".into(), Some("1".into()));
440
441 let id1 = c1.id.clone();
442 let id2 = c2.id.clone();
443 let id3 = c3.id.clone();
444
445 sbom.components.insert(id1.clone(), c1);
446 sbom.components.insert(id2.clone(), c2);
447 sbom.components.insert(id3.clone(), c3);
448
449 sbom.dependencies
451 .entry(id1.clone())
452 .or_default()
453 .insert(id2.clone());
454 sbom.dependencies
455 .entry(id2.clone())
456 .or_default()
457 .insert(id3.clone());
458
459 assert_eq!(sbom.roots(), vec![id1.clone()]);
460 assert_eq!(sbom.deps(&id1), vec![id2.clone()]);
461 assert_eq!(sbom.rdeps(&id2), vec![id1.clone()]);
462
463 let transitive = sbom.transitive_deps(&id1);
464 assert!(transitive.contains(&id2));
465 assert!(transitive.contains(&id3));
466 assert_eq!(transitive.len(), 2);
467
468 assert_eq!(sbom.missing_hashes().len(), 3);
469 }
470
471 #[test]
472 fn test_ecosystems_query() {
473 let mut sbom = Sbom::default();
474
475 let mut c1 = Component::new("lodash".into(), Some("1.0".into()));
476 c1.ecosystem = Some("npm".into());
477 let mut c2 = Component::new("serde".into(), Some("1.0".into()));
478 c2.ecosystem = Some("cargo".into());
479 let mut c3 = Component::new("other-npm".into(), Some("1.0".into()));
480 c3.ecosystem = Some("npm".into());
481 let c4 = Component::new("no-ecosystem".into(), Some("1.0".into()));
482
483 sbom.components.insert(c1.id.clone(), c1);
484 sbom.components.insert(c2.id.clone(), c2);
485 sbom.components.insert(c3.id.clone(), c3);
486 sbom.components.insert(c4.id.clone(), c4);
487
488 let ecosystems = sbom.ecosystems();
489 assert_eq!(ecosystems.len(), 2);
490 assert!(ecosystems.contains("npm"));
491 assert!(ecosystems.contains("cargo"));
492 }
493
494 #[test]
495 fn test_licenses_query() {
496 let mut sbom = Sbom::default();
497
498 let mut c1 = Component::new("a".into(), Some("1.0".into()));
499 c1.licenses.insert("MIT".into());
500 c1.licenses.insert("Apache-2.0".into());
501 let mut c2 = Component::new("b".into(), Some("1.0".into()));
502 c2.licenses.insert("MIT".into());
503 c2.licenses.insert("GPL-3.0-only".into());
504 let c3 = Component::new("c".into(), Some("1.0".into()));
505
506 sbom.components.insert(c1.id.clone(), c1);
507 sbom.components.insert(c2.id.clone(), c2);
508 sbom.components.insert(c3.id.clone(), c3);
509
510 let licenses = sbom.licenses();
511 assert_eq!(licenses.len(), 3);
512 assert!(licenses.contains("MIT"));
513 assert!(licenses.contains("Apache-2.0"));
514 assert!(licenses.contains("GPL-3.0-only"));
515 }
516
517 #[test]
518 fn test_by_purl() {
519 let mut sbom = Sbom::default();
520
521 let mut c1 = Component::new("lodash".into(), Some("4.17.21".into()));
522 c1.purl = Some("pkg:npm/lodash@4.17.21".into());
523 c1.id = ComponentId::new(c1.purl.as_deref(), &[]);
524 let c2 = Component::new("no-purl".into(), Some("1.0".into()));
525
526 sbom.components.insert(c1.id.clone(), c1);
527 sbom.components.insert(c2.id.clone(), c2);
528
529 let found = sbom.by_purl("pkg:npm/lodash@4.17.21");
530 assert!(found.is_some());
531 assert_eq!(found.unwrap().name, "lodash");
532
533 assert!(sbom.by_purl("pkg:npm/nonexistent@1.0").is_none());
534 }
535
536 #[test]
537 fn test_component_id_unparseable_purl() {
538 let id = ComponentId::new(Some("not-a-valid-purl-but-still-a-string"), &[]);
540 assert_eq!(id.as_str(), "not-a-valid-purl-but-still-a-string");
541 }
542
543 #[test]
544 fn test_component_id_display() {
545 let id = ComponentId::new(Some("pkg:npm/foo@1.0"), &[]);
546 assert_eq!(format!("{}", id), "pkg:npm/foo@1.0");
547 }
548
549 #[test]
550 fn test_sbom_normalize_clears_metadata() {
551 let mut sbom = Sbom::default();
552 sbom.metadata.timestamp = Some("2024-01-01T00:00:00Z".into());
553 sbom.metadata.tools.push("syft".into());
554 sbom.metadata.authors.push("alice".into());
555
556 let c = Component::new("a".into(), Some("1".into()));
557 sbom.components.insert(c.id.clone(), c);
558
559 sbom.normalize();
560
561 assert!(sbom.metadata.timestamp.is_none());
562 assert!(sbom.metadata.tools.is_empty());
563 assert!(sbom.metadata.authors.is_empty());
564 }
565
566 #[test]
567 fn test_missing_hashes_mixed() {
568 let mut sbom = Sbom::default();
569
570 let c1 = Component::new("no-hash".into(), Some("1.0".into()));
571 let mut c2 = Component::new("has-hash".into(), Some("1.0".into()));
572 c2.hashes.insert("sha256".into(), "abc".into());
573
574 sbom.components.insert(c1.id.clone(), c1);
575 sbom.components.insert(c2.id.clone(), c2);
576
577 let missing = sbom.missing_hashes();
578 assert_eq!(missing.len(), 1);
579 }
580
581 #[test]
582 fn test_ecosystem_from_purl() {
583 use super::ecosystem_from_purl;
584
585 assert_eq!(
586 ecosystem_from_purl("pkg:npm/lodash@4.17.21"),
587 Some("npm".to_string())
588 );
589 assert_eq!(
590 ecosystem_from_purl("pkg:cargo/serde@1.0.0"),
591 Some("cargo".to_string())
592 );
593 assert_eq!(
594 ecosystem_from_purl("pkg:pypi/requests@2.28.0"),
595 Some("pypi".to_string())
596 );
597 assert_eq!(
598 ecosystem_from_purl("pkg:maven/org.apache/commons@1.0"),
599 Some("maven".to_string())
600 );
601 assert_eq!(ecosystem_from_purl("invalid-purl"), None);
602 assert_eq!(ecosystem_from_purl(""), None);
603 }
604}