#![doc = include_str!("../readme.md")]
use indexmap::IndexMap;
use packageurl::PackageUrl;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::{BTreeMap, BTreeSet};
use std::str::FromStr;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Sbom {
pub metadata: Metadata,
pub components: IndexMap<ComponentId, Component>,
pub dependencies: BTreeMap<ComponentId, BTreeSet<ComponentId>>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub warnings: Vec<String>,
}
impl Default for Sbom {
fn default() -> Self {
Self {
metadata: Metadata::default(),
components: IndexMap::new(),
dependencies: BTreeMap::new(),
warnings: Vec::new(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct Metadata {
pub timestamp: Option<String>,
pub tools: Vec<String>,
pub authors: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct ComponentId(String);
impl ComponentId {
pub fn new(purl: Option<&str>, properties: &[(&str, &str)]) -> Self {
if let Some(purl) = purl {
if let Ok(parsed) = PackageUrl::from_str(purl) {
return ComponentId(parsed.to_string());
}
return ComponentId(purl.to_string());
}
let mut hasher = Sha256::new();
for (k, v) in properties {
hasher.update(k.as_bytes());
hasher.update(b":");
hasher.update(v.as_bytes());
hasher.update(b"|");
}
let hash = hex::encode(hasher.finalize());
ComponentId(format!("h:{}", hash))
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl std::fmt::Display for ComponentId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Component {
pub id: ComponentId,
pub name: String,
pub version: Option<String>,
pub ecosystem: Option<String>,
pub supplier: Option<String>,
pub description: Option<String>,
pub purl: Option<String>,
pub licenses: BTreeSet<String>,
pub hashes: BTreeMap<String, String>,
pub source_ids: Vec<String>,
}
impl Component {
pub fn new(name: String, version: Option<String>) -> Self {
let mut props = vec![("name", name.as_str())];
if let Some(v) = &version {
props.push(("version", v));
}
let id = ComponentId::new(None, &props);
Self {
id,
name,
version,
ecosystem: None,
supplier: None,
description: None,
purl: None,
licenses: BTreeSet::new(),
hashes: BTreeMap::new(),
source_ids: Vec::new(),
}
}
}
impl Sbom {
pub fn normalize(&mut self) {
self.components.sort_keys();
for component in self.components.values_mut() {
component.normalize();
}
self.metadata.timestamp = None;
self.metadata.tools.clear();
self.metadata.authors.clear(); }
pub fn roots(&self) -> Vec<ComponentId> {
let targets: BTreeSet<_> = self.dependencies.values().flatten().collect();
self.components
.keys()
.filter(|id| !targets.contains(id))
.cloned()
.collect()
}
pub fn deps(&self, id: &ComponentId) -> Vec<ComponentId> {
self.dependencies
.get(id)
.map(|d| d.iter().cloned().collect())
.unwrap_or_default()
}
pub fn rdeps(&self, id: &ComponentId) -> Vec<ComponentId> {
self.dependencies
.iter()
.filter(|(_, children)| children.contains(id))
.map(|(parent, _)| parent.clone())
.collect()
}
pub fn transitive_deps(&self, id: &ComponentId) -> BTreeSet<ComponentId> {
let mut visited = BTreeSet::new();
let mut stack = vec![id.clone()];
while let Some(current) = stack.pop() {
if let Some(children) = self.dependencies.get(¤t) {
for child in children {
if visited.insert(child.clone()) {
stack.push(child.clone());
}
}
}
}
visited
}
pub fn ecosystems(&self) -> BTreeSet<String> {
self.components
.values()
.filter_map(|c| c.ecosystem.clone())
.collect()
}
pub fn licenses(&self) -> BTreeSet<String> {
self.components
.values()
.flat_map(|c| c.licenses.iter().cloned())
.collect()
}
pub fn missing_hashes(&self) -> Vec<ComponentId> {
self.components
.iter()
.filter(|(_, c)| c.hashes.is_empty())
.map(|(id, _)| id.clone())
.collect()
}
pub fn by_purl(&self, purl: &str) -> Option<&Component> {
let id = ComponentId::new(Some(purl), &[]);
self.components.get(&id)
}
}
impl Component {
pub fn normalize(&mut self) {
let normalized_hashes: BTreeMap<String, String> = self
.hashes
.iter()
.map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
.collect();
self.hashes = normalized_hashes;
}
}
pub fn ecosystem_from_purl(purl: &str) -> Option<String> {
PackageUrl::from_str(purl).ok().map(|p| p.ty().to_string())
}
pub fn parse_license_expression(license: &str) -> BTreeSet<String> {
match spdx::Expression::parse(license) {
Ok(expr) => {
let ids: BTreeSet<String> = expr
.requirements()
.filter_map(|r| r.req.license.id())
.map(|id| id.name.to_string())
.collect();
if ids.is_empty() {
BTreeSet::from([license.to_string()])
} else {
ids
}
}
Err(_) => {
BTreeSet::from([license.to_string()])
}
}
}
pub fn canonical_algorithm_name(name: &str) -> String {
match name.replace('-', "").to_uppercase().as_str() {
"MD2" => "MD2",
"MD4" => "MD4",
"MD5" => "MD5",
"MD6" => "MD6",
"SHA1" => "SHA-1",
"SHA224" => "SHA-224",
"SHA256" => "SHA-256",
"SHA384" => "SHA-384",
"SHA512" => "SHA-512",
"SHA3256" => "SHA3-256",
"SHA3384" => "SHA3-384",
"SHA3512" => "SHA3-512",
"BLAKE2B256" => "BLAKE2b-256",
"BLAKE2B384" => "BLAKE2b-384",
"BLAKE2B512" => "BLAKE2b-512",
"BLAKE3" => "BLAKE3",
"ADLER32" => "ADLER-32",
_ => return name.to_string(),
}
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_component_id_purl() {
let purl = "pkg:npm/left-pad@1.3.0";
let id = ComponentId::new(Some(purl), &[]);
assert_eq!(id.as_str(), purl);
}
#[test]
fn test_component_id_hash_stability() {
let props = [("name", "foo"), ("version", "1.0")];
let id1 = ComponentId::new(None, &props);
let id2 = ComponentId::new(None, &props);
assert_eq!(id1, id2);
assert!(id1.as_str().starts_with("h:"));
}
#[test]
fn test_normalization() {
let mut comp = Component::new("test".to_string(), Some("1.0".to_string()));
comp.licenses.insert("MIT".to_string());
comp.licenses.insert("Apache-2.0".to_string());
comp.hashes.insert("SHA-256".to_string(), "ABC".to_string());
comp.normalize();
assert_eq!(
comp.licenses,
BTreeSet::from(["Apache-2.0".to_string(), "MIT".to_string()])
);
assert_eq!(comp.hashes.get("sha-256").unwrap(), "abc");
}
#[test]
fn test_parse_license_expression() {
let ids = parse_license_expression("MIT OR Apache-2.0");
assert!(ids.contains("MIT"));
assert!(ids.contains("Apache-2.0"));
assert_eq!(ids.len(), 2);
let ids = parse_license_expression("MIT");
assert_eq!(ids, BTreeSet::from(["MIT".to_string()]));
let ids = parse_license_expression("MIT AND Apache-2.0");
assert!(ids.contains("MIT"));
assert!(ids.contains("Apache-2.0"));
let ids = parse_license_expression("Custom License");
assert_eq!(ids, BTreeSet::from(["Custom License".to_string()]));
let ids = parse_license_expression("LicenseRef-proprietary");
assert_eq!(ids, BTreeSet::from(["LicenseRef-proprietary".to_string()]));
}
#[test]
fn test_license_set_equality() {
let mut c1 = Component::new("test".into(), None);
c1.licenses.insert("MIT".into());
c1.licenses.insert("Apache-2.0".into());
let mut c2 = Component::new("test".into(), None);
c2.licenses.insert("Apache-2.0".into());
c2.licenses.insert("MIT".into());
assert_eq!(c1.licenses, c2.licenses);
}
#[test]
fn test_query_api() {
let mut sbom = Sbom::default();
let c1 = Component::new("a".into(), Some("1".into()));
let c2 = Component::new("b".into(), Some("1".into()));
let c3 = Component::new("c".into(), Some("1".into()));
let id1 = c1.id.clone();
let id2 = c2.id.clone();
let id3 = c3.id.clone();
sbom.components.insert(id1.clone(), c1);
sbom.components.insert(id2.clone(), c2);
sbom.components.insert(id3.clone(), c3);
sbom.dependencies
.entry(id1.clone())
.or_default()
.insert(id2.clone());
sbom.dependencies
.entry(id2.clone())
.or_default()
.insert(id3.clone());
assert_eq!(sbom.roots(), vec![id1.clone()]);
assert_eq!(sbom.deps(&id1), vec![id2.clone()]);
assert_eq!(sbom.rdeps(&id2), vec![id1.clone()]);
let transitive = sbom.transitive_deps(&id1);
assert!(transitive.contains(&id2));
assert!(transitive.contains(&id3));
assert_eq!(transitive.len(), 2);
assert_eq!(sbom.missing_hashes().len(), 3);
}
#[test]
fn test_ecosystems_query() {
let mut sbom = Sbom::default();
let mut c1 = Component::new("lodash".into(), Some("1.0".into()));
c1.ecosystem = Some("npm".into());
let mut c2 = Component::new("serde".into(), Some("1.0".into()));
c2.ecosystem = Some("cargo".into());
let mut c3 = Component::new("other-npm".into(), Some("1.0".into()));
c3.ecosystem = Some("npm".into());
let c4 = Component::new("no-ecosystem".into(), Some("1.0".into()));
sbom.components.insert(c1.id.clone(), c1);
sbom.components.insert(c2.id.clone(), c2);
sbom.components.insert(c3.id.clone(), c3);
sbom.components.insert(c4.id.clone(), c4);
let ecosystems = sbom.ecosystems();
assert_eq!(ecosystems.len(), 2);
assert!(ecosystems.contains("npm"));
assert!(ecosystems.contains("cargo"));
}
#[test]
fn test_licenses_query() {
let mut sbom = Sbom::default();
let mut c1 = Component::new("a".into(), Some("1.0".into()));
c1.licenses.insert("MIT".into());
c1.licenses.insert("Apache-2.0".into());
let mut c2 = Component::new("b".into(), Some("1.0".into()));
c2.licenses.insert("MIT".into());
c2.licenses.insert("GPL-3.0-only".into());
let c3 = Component::new("c".into(), Some("1.0".into()));
sbom.components.insert(c1.id.clone(), c1);
sbom.components.insert(c2.id.clone(), c2);
sbom.components.insert(c3.id.clone(), c3);
let licenses = sbom.licenses();
assert_eq!(licenses.len(), 3);
assert!(licenses.contains("MIT"));
assert!(licenses.contains("Apache-2.0"));
assert!(licenses.contains("GPL-3.0-only"));
}
#[test]
fn test_by_purl() {
let mut sbom = Sbom::default();
let mut c1 = Component::new("lodash".into(), Some("4.17.21".into()));
c1.purl = Some("pkg:npm/lodash@4.17.21".into());
c1.id = ComponentId::new(c1.purl.as_deref(), &[]);
let c2 = Component::new("no-purl".into(), Some("1.0".into()));
sbom.components.insert(c1.id.clone(), c1);
sbom.components.insert(c2.id.clone(), c2);
let found = sbom.by_purl("pkg:npm/lodash@4.17.21");
assert!(found.is_some());
assert_eq!(found.unwrap().name, "lodash");
assert!(sbom.by_purl("pkg:npm/nonexistent@1.0").is_none());
}
#[test]
fn test_component_id_unparseable_purl() {
let id = ComponentId::new(Some("not-a-valid-purl-but-still-a-string"), &[]);
assert_eq!(id.as_str(), "not-a-valid-purl-but-still-a-string");
}
#[test]
fn test_component_id_display() {
let id = ComponentId::new(Some("pkg:npm/foo@1.0"), &[]);
assert_eq!(format!("{}", id), "pkg:npm/foo@1.0");
}
#[test]
fn test_sbom_normalize_clears_metadata() {
let mut sbom = Sbom::default();
sbom.metadata.timestamp = Some("2024-01-01T00:00:00Z".into());
sbom.metadata.tools.push("syft".into());
sbom.metadata.authors.push("alice".into());
let c = Component::new("a".into(), Some("1".into()));
sbom.components.insert(c.id.clone(), c);
sbom.normalize();
assert!(sbom.metadata.timestamp.is_none());
assert!(sbom.metadata.tools.is_empty());
assert!(sbom.metadata.authors.is_empty());
}
#[test]
fn test_missing_hashes_mixed() {
let mut sbom = Sbom::default();
let c1 = Component::new("no-hash".into(), Some("1.0".into()));
let mut c2 = Component::new("has-hash".into(), Some("1.0".into()));
c2.hashes.insert("sha256".into(), "abc".into());
sbom.components.insert(c1.id.clone(), c1);
sbom.components.insert(c2.id.clone(), c2);
let missing = sbom.missing_hashes();
assert_eq!(missing.len(), 1);
}
#[test]
fn test_ecosystem_from_purl() {
use super::ecosystem_from_purl;
assert_eq!(
ecosystem_from_purl("pkg:npm/lodash@4.17.21"),
Some("npm".to_string())
);
assert_eq!(
ecosystem_from_purl("pkg:cargo/serde@1.0.0"),
Some("cargo".to_string())
);
assert_eq!(
ecosystem_from_purl("pkg:pypi/requests@2.28.0"),
Some("pypi".to_string())
);
assert_eq!(
ecosystem_from_purl("pkg:maven/org.apache/commons@1.0"),
Some("maven".to_string())
);
assert_eq!(ecosystem_from_purl("invalid-purl"), None);
assert_eq!(ecosystem_from_purl(""), None);
}
#[test]
fn test_canonical_algorithm_name() {
assert_eq!(canonical_algorithm_name("SHA256"), "SHA-256");
assert_eq!(canonical_algorithm_name("SHA1"), "SHA-1");
assert_eq!(canonical_algorithm_name("SHA384"), "SHA-384");
assert_eq!(canonical_algorithm_name("SHA512"), "SHA-512");
assert_eq!(canonical_algorithm_name("SHA224"), "SHA-224");
assert_eq!(canonical_algorithm_name("SHA-256"), "SHA-256");
assert_eq!(canonical_algorithm_name("SHA-1"), "SHA-1");
assert_eq!(canonical_algorithm_name("SHA-384"), "SHA-384");
assert_eq!(canonical_algorithm_name("sha256"), "SHA-256");
assert_eq!(canonical_algorithm_name("sha-256"), "SHA-256");
assert_eq!(canonical_algorithm_name("SHA3-256"), "SHA3-256");
assert_eq!(canonical_algorithm_name("SHA3256"), "SHA3-256");
assert_eq!(canonical_algorithm_name("MD5"), "MD5");
assert_eq!(canonical_algorithm_name("md5"), "MD5");
assert_eq!(canonical_algorithm_name("BLAKE2b-256"), "BLAKE2b-256");
assert_eq!(canonical_algorithm_name("BLAKE2B256"), "BLAKE2b-256");
assert_eq!(canonical_algorithm_name("BLAKE3"), "BLAKE3");
assert_eq!(canonical_algorithm_name("ADLER32"), "ADLER-32");
assert_eq!(canonical_algorithm_name("ADLER-32"), "ADLER-32");
assert_eq!(canonical_algorithm_name("TIGER"), "TIGER");
}
}