use std::collections::HashSet;
use serde_json::{Map, Value, json};
use crate::model::{
CanonicalId, Component, DependencyType, Hash, HashAlgorithm, LicenseExpression, NormalizedSbom,
};
use super::EmitError;
use super::fidelity::FidelityReport;
const SPDX_VERSION: &str = "SPDX-2.3";
const DOC_SPDX_ID: &str = "SPDXRef-DOCUMENT";
pub fn emit_spdx(sbom: &NormalizedSbom) -> Result<(String, FidelityReport), EmitError> {
let mut report = FidelityReport::new(sbom.document.format.to_string(), "SPDX 2.3");
let id_assigner = SpdxIdAssigner::build(sbom);
let mut packages = Vec::with_capacity(sbom.components.len());
for (id, component) in &sbom.components {
let spdx_id = id_assigner.id_for(id);
packages.push(emit_package(component, spdx_id, &mut report));
}
let mut doc = Map::new();
doc.insert("spdxVersion".to_string(), json!(SPDX_VERSION));
doc.insert("dataLicense".to_string(), json!("CC0-1.0"));
doc.insert("SPDXID".to_string(), json!(DOC_SPDX_ID));
doc.insert(
"name".to_string(),
json!(
sbom.document
.name
.clone()
.unwrap_or_else(|| "converted-sbom".to_string())
),
);
doc.insert(
"documentNamespace".to_string(),
json!(document_namespace(sbom)),
);
doc.insert(
"creationInfo".to_string(),
emit_creation_info(sbom, &mut report),
);
doc.insert("packages".to_string(), Value::Array(packages));
let relationships = emit_relationships(sbom, &id_assigner, &mut report);
doc.insert("relationships".to_string(), Value::Array(relationships));
note_unmappable(sbom, &mut report);
let serialized = serde_json::to_string_pretty(&Value::Object(doc))?;
Ok((serialized, report))
}
struct SpdxIdAssigner {
map: std::collections::HashMap<CanonicalId, String>,
}
impl SpdxIdAssigner {
fn build(sbom: &NormalizedSbom) -> Self {
let mut map = std::collections::HashMap::with_capacity(sbom.components.len());
let mut used: HashSet<String> = HashSet::new();
used.insert(DOC_SPDX_ID.to_string());
for (index, (id, component)) in sbom.components.iter().enumerate() {
let base = sanitize_spdx_id(&component.name, id, index);
let mut candidate = format!("SPDXRef-{base}");
let mut suffix = 1usize;
while used.contains(&candidate) {
candidate = format!("SPDXRef-{base}-{suffix}");
suffix += 1;
}
used.insert(candidate.clone());
map.insert(id.clone(), candidate);
}
Self { map }
}
fn id_for(&self, id: &CanonicalId) -> &str {
self.map
.get(id)
.map_or(DOC_SPDX_ID, std::string::String::as_str)
}
fn contains(&self, id: &CanonicalId) -> bool {
self.map.contains_key(id)
}
}
fn sanitize_spdx_id(name: &str, id: &CanonicalId, index: usize) -> String {
let cleaned: String = name
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '.' || c == '-' {
c
} else {
'-'
}
})
.collect();
let trimmed = cleaned.trim_matches('-');
if !trimmed.is_empty() {
return trimmed.to_string();
}
let from_id: String = id
.value()
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '.' || c == '-' {
c
} else {
'-'
}
})
.collect();
let from_id = from_id.trim_matches('-');
if from_id.is_empty() {
format!("Package-{index}")
} else {
from_id.to_string()
}
}
fn emit_package(component: &Component, spdx_id: &str, report: &mut FidelityReport) -> Value {
let mut obj = Map::new();
obj.insert("SPDXID".to_string(), json!(spdx_id));
report.synthesized("SPDXID from canonical id");
obj.insert("name".to_string(), json!(component.name));
if let Some(version) = &component.version {
obj.insert("versionInfo".to_string(), json!(version));
}
obj.insert(
"downloadLocation".to_string(),
json!(download_location(component)),
);
obj.insert("filesAnalyzed".to_string(), json!(false));
let declared = license_field(&component.licenses.declared);
obj.insert("licenseDeclared".to_string(), json!(declared));
let concluded = component
.licenses
.concluded
.as_ref()
.map_or_else(|| declared.clone(), |c| c.expression.clone());
obj.insert("licenseConcluded".to_string(), json!(concluded));
if let Some(copyright) = &component.copyright {
obj.insert("copyrightText".to_string(), json!(copyright));
}
if let Some(desc) = &component.description {
obj.insert("description".to_string(), json!(desc));
}
if let Some(supplier) = &component.supplier {
obj.insert(
"supplier".to_string(),
json!(format!("Organization: {}", supplier.name)),
);
}
if let Some(checksums) = emit_checksums(&component.hashes) {
obj.insert("checksums".to_string(), checksums);
}
if let Some(ext_refs) = emit_external_refs(component) {
obj.insert("externalRefs".to_string(), ext_refs);
}
splice_preserved_fields(component, &mut obj, report);
Value::Object(obj)
}
fn download_location(component: &Component) -> String {
use crate::model::ExternalRefType;
component
.external_refs
.iter()
.find(|r| {
matches!(
r.ref_type,
ExternalRefType::Vcs
| ExternalRefType::SourceDistribution
| ExternalRefType::BinaryDistribution
| ExternalRefType::Website
)
})
.map_or_else(|| "NOASSERTION".to_string(), |r| r.url.clone())
}
fn license_field(declared: &[LicenseExpression]) -> String {
let exprs: Vec<&str> = declared.iter().map(|e| e.expression.as_str()).collect();
if exprs.is_empty() {
"NOASSERTION".to_string()
} else if exprs.len() == 1 {
exprs[0].to_string()
} else {
exprs
.iter()
.map(|e| format!("({e})"))
.collect::<Vec<_>>()
.join(" AND ")
}
}
fn emit_checksums(hashes: &[Hash]) -> Option<Value> {
let items: Vec<Value> = hashes
.iter()
.filter_map(|h| {
spdx_algorithm(&h.algorithm)
.map(|alg| json!({ "algorithm": alg, "checksumValue": h.value }))
})
.collect();
if items.is_empty() {
None
} else {
Some(Value::Array(items))
}
}
fn spdx_algorithm(alg: &HashAlgorithm) -> Option<&'static str> {
Some(match alg {
HashAlgorithm::Md5 => "MD5",
HashAlgorithm::Sha1 => "SHA1",
HashAlgorithm::Sha256 => "SHA256",
HashAlgorithm::Sha384 => "SHA384",
HashAlgorithm::Sha512 => "SHA512",
HashAlgorithm::Sha3_256 => "SHA3-256",
HashAlgorithm::Sha3_384 => "SHA3-384",
HashAlgorithm::Sha3_512 => "SHA3-512",
HashAlgorithm::Blake2b256 => "BLAKE2b-256",
HashAlgorithm::Blake2b384 => "BLAKE2b-384",
HashAlgorithm::Blake2b512 => "BLAKE2b-512",
HashAlgorithm::Blake3 => "BLAKE3",
HashAlgorithm::Streebog256 | HashAlgorithm::Streebog512 | HashAlgorithm::Other(_) => {
return None;
}
})
}
fn emit_external_refs(component: &Component) -> Option<Value> {
let mut items = Vec::new();
if let Some(purl) = &component.identifiers.purl {
items.push(json!({
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": purl,
}));
}
for swhid in &component.identifiers.swhid {
items.push(json!({
"referenceCategory": "PERSISTENT-ID",
"referenceType": "swh",
"referenceLocator": swhid.to_string(),
}));
}
for cpe in &component.identifiers.cpe {
let ref_type = if cpe.starts_with("cpe:2.3:") {
"cpe23Type"
} else {
"cpe22Type"
};
items.push(json!({
"referenceCategory": "SECURITY",
"referenceType": ref_type,
"referenceLocator": cpe,
}));
}
if items.is_empty() {
None
} else {
Some(Value::Array(items))
}
}
fn splice_preserved_fields(
component: &Component,
obj: &mut Map<String, Value>,
report: &mut FidelityReport,
) {
let Some(src) = component.extensions.source_json.as_deref() else {
return;
};
let looks_spdx = src.get("SPDXID").is_some() || src.get("spdxId").is_some();
if !looks_spdx {
return;
}
for key in [
"primaryPackagePurpose",
"homepage",
"sourceInfo",
"comment",
"summary",
"originator",
"packageFileName",
"builtDate",
"releaseDate",
"validUntilDate",
] {
if let Some(block) = src.get(key)
&& !obj.contains_key(key)
{
obj.insert(key.to_string(), block.clone());
report.preserved(format!("package.{key}"));
}
}
}
fn document_namespace(sbom: &NormalizedSbom) -> String {
if let Some(serial) = &sbom.document.serial_number
&& (serial.starts_with("http://")
|| serial.starts_with("https://")
|| serial.starts_with("urn:"))
{
return serial.clone();
}
let name = sbom.document.name.as_deref().unwrap_or("converted-sbom");
format!(
"https://spdx.org/spdxdocs/{}-{:016x}",
sanitize_namespace(name),
sbom.content_hash
)
}
fn sanitize_namespace(name: &str) -> String {
let cleaned: String = name
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '.' || c == '-' {
c
} else {
'-'
}
})
.collect();
let trimmed = cleaned.trim_matches('-');
if trimmed.is_empty() {
"sbom".to_string()
} else {
trimmed.to_string()
}
}
fn emit_creation_info(sbom: &NormalizedSbom, report: &mut FidelityReport) -> Value {
use crate::model::CreatorType;
let mut creators: Vec<String> = sbom
.document
.creators
.iter()
.map(|c| {
let prefix = match c.creator_type {
CreatorType::Tool => "Tool",
CreatorType::Organization => "Organization",
CreatorType::Person => "Person",
};
format!("{prefix}: {}", c.name)
})
.collect();
if creators.is_empty() {
creators.push("Tool: sbom-tools".to_string());
report.synthesized("creationInfo.creators (default tool)");
}
json!({
"created": sbom.document.created.to_rfc3339(),
"creators": creators,
})
}
fn emit_relationships(
sbom: &NormalizedSbom,
id_assigner: &SpdxIdAssigner,
report: &mut FidelityReport,
) -> Vec<Value> {
let mut rels = Vec::new();
if let Some(primary) = &sbom.primary_component_id
&& id_assigner.contains(primary)
{
rels.push(json!({
"spdxElementId": DOC_SPDX_ID,
"relationshipType": "DESCRIBES",
"relatedSpdxElement": id_assigner.id_for(primary),
}));
report.synthesized("DESCRIBES for primary component");
}
let mut dropped_rel = false;
for edge in &sbom.edges {
if !id_assigner.contains(&edge.from) || !id_assigner.contains(&edge.to) {
continue;
}
let from = id_assigner.id_for(&edge.from);
let to = id_assigner.id_for(&edge.to);
let rel_type = match spdx_relationship_type(&edge.relationship) {
Some(t) => t,
None => {
dropped_rel = true;
"OTHER"
}
};
rels.push(json!({
"spdxElementId": from,
"relationshipType": rel_type,
"relatedSpdxElement": to,
}));
}
if dropped_rel {
report.dropped("relationship kind with no SPDX mapping (emitted as OTHER)");
}
report.synthesized("relationships from edge list");
rels
}
fn spdx_relationship_type(rel: &DependencyType) -> Option<&'static str> {
Some(match rel {
DependencyType::DependsOn
| DependencyType::OptionalDependsOn
| DependencyType::DevDependsOn
| DependencyType::BuildDependsOn
| DependencyType::TestDependsOn
| DependencyType::RuntimeDependsOn
| DependencyType::ProvidedDependsOn => "DEPENDS_ON",
DependencyType::Describes => "DESCRIBES",
DependencyType::Generates => "GENERATES",
DependencyType::Contains | DependencyType::Provides => "CONTAINS",
DependencyType::AncestorOf => "ANCESTOR_OF",
DependencyType::VariantOf => "VARIANT_OF",
DependencyType::DistributionArtifact => "DISTRIBUTION_ARTIFACT",
DependencyType::PatchFor => "PATCH_FOR",
DependencyType::CopyOf => "COPY_OF",
DependencyType::FileAdded => "FILE_ADDED",
DependencyType::FileDeleted => "FILE_DELETED",
DependencyType::FileModified => "FILE_MODIFIED",
DependencyType::DynamicLink => "DYNAMIC_LINK",
DependencyType::StaticLink => "STATIC_LINK",
DependencyType::Other(_) => return None,
})
}
fn note_unmappable(sbom: &NormalizedSbom, report: &mut FidelityReport) {
for component in sbom.components.values() {
for hash in &component.hashes {
if spdx_algorithm(&hash.algorithm).is_none() {
report.dropped("checksum algorithm with no SPDX spelling");
}
}
if component.ml_model.is_some() {
report.dropped("ML model metadata (no SPDX 2.3 representation)");
}
if component.dataset.is_some() {
report.dropped("dataset metadata (no SPDX 2.3 representation)");
}
if component.crypto_properties.is_some() {
report.dropped("cryptoProperties (no SPDX 2.3 representation)");
}
if !component.extensions.properties.is_empty() {
report.dropped("CycloneDX properties (no SPDX 2.3 representation)");
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parsers::parse_sbom_str;
const SPDX: &str = r#"{
"spdxVersion": "SPDX-2.3", "SPDXID": "SPDXRef-DOCUMENT", "name": "app",
"dataLicense": "CC0-1.0",
"documentNamespace": "https://example.com/app",
"creationInfo": {"created": "2026-01-04T12:00:00Z",
"creators": ["Tool: t-1.0"]},
"packages": [
{"SPDXID": "SPDXRef-Package-lodash", "name": "lodash",
"versionInfo": "4.17.21", "downloadLocation": "NOASSERTION",
"licenseDeclared": "MIT", "licenseConcluded": "MIT",
"checksums": [{"algorithm": "SHA256", "checksumValue": "abc"}],
"externalRefs": [{"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": "pkg:npm/lodash@4.17.21"}]},
{"SPDXID": "SPDXRef-Package-express", "name": "express",
"versionInfo": "4.18.2", "downloadLocation": "NOASSERTION",
"licenseDeclared": "MIT", "licenseConcluded": "MIT"}
],
"relationships": [
{"spdxElementId": "SPDXRef-DOCUMENT", "relationshipType": "DESCRIBES",
"relatedSpdxElement": "SPDXRef-Package-express"},
{"spdxElementId": "SPDXRef-Package-express", "relationshipType": "DEPENDS_ON",
"relatedSpdxElement": "SPDXRef-Package-lodash"}
]
}"#;
const CDX: &str = r#"{
"bomFormat": "CycloneDX", "specVersion": "1.5", "version": 1,
"metadata": {"component": {"type": "application", "bom-ref": "root",
"name": "app", "version": "1.0.0"}},
"components": [
{"type": "library", "bom-ref": "lodash@4.17.21", "name": "lodash",
"version": "4.17.21", "purl": "pkg:npm/lodash@4.17.21",
"hashes": [{"alg": "SHA-256", "content": "abc"}],
"licenses": [{"license": {"id": "MIT"}}]}
],
"dependencies": [{"ref": "root", "dependsOn": ["lodash@4.17.21"]}]
}"#;
#[test]
fn emits_valid_spdx_that_reparses() {
let sbom = parse_sbom_str(SPDX).unwrap();
let (json, _report) = emit_spdx(&sbom).unwrap();
let reparsed = parse_sbom_str(&json).expect("emitted SPDX must re-parse");
assert_eq!(reparsed.document.format.to_string(), "SPDX");
assert_eq!(reparsed.document.format_version, "2.3");
}
#[test]
fn spdx_round_trip_preserves_counts() {
let sbom = parse_sbom_str(SPDX).unwrap();
let (json, _report) = emit_spdx(&sbom).unwrap();
let reparsed = parse_sbom_str(&json).unwrap();
assert_eq!(reparsed.components.len(), sbom.components.len());
assert_eq!(reparsed.edges.len(), sbom.edges.len());
let lodash = reparsed
.components
.values()
.find(|c| c.name == "lodash")
.unwrap();
assert_eq!(lodash.hashes.len(), 1);
assert!(!lodash.licenses.declared.is_empty());
assert_eq!(
lodash.identifiers.purl.as_deref(),
Some("pkg:npm/lodash@4.17.21")
);
}
#[test]
fn cross_family_cdx_to_spdx_maps_components() {
let sbom = parse_sbom_str(CDX).unwrap();
let (json, _report) = emit_spdx(&sbom).unwrap();
let reparsed = parse_sbom_str(&json).expect("CDX→SPDX output must re-parse");
let names: Vec<&str> = reparsed
.components
.values()
.map(|c| c.name.as_str())
.collect();
assert!(names.contains(&"app"), "primary mapped: {names:?}");
assert!(names.contains(&"lodash"), "lodash mapped: {names:?}");
assert!(!reparsed.edges.is_empty(), "dependency edges mapped");
}
#[test]
fn sanitizes_purl_spdx_ids() {
let sbom = parse_sbom_str(CDX).unwrap();
let (json, _report) = emit_spdx(&sbom).unwrap();
let doc: Value = serde_json::from_str(&json).unwrap();
for pkg in doc["packages"].as_array().unwrap() {
let id = pkg["SPDXID"].as_str().unwrap();
assert!(id.starts_with("SPDXRef-"), "bad SPDXID {id}");
assert!(
id.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-'),
"SPDXID {id} has illegal chars"
);
}
}
}