use std::collections::HashMap;
use serde_json::Value;
use crate::model::NormalizedSbom;
pub fn preserve_source_json(raw_json: &str, sbom: &mut NormalizedSbom) {
let Ok(doc): Result<Value, _> = serde_json::from_str(raw_json) else {
return;
};
let mut by_key: HashMap<String, Value> = HashMap::new();
index_cyclonedx(&doc, &mut by_key);
index_spdx2(&doc, &mut by_key);
index_spdx3(&doc, &mut by_key);
if by_key.is_empty() {
return;
}
for component in sbom.components.values_mut() {
let candidates = [
Some(component.identifiers.format_id.as_str()),
component.identifiers.purl.as_deref(),
Some(component.name.as_str()),
];
for key in candidates.into_iter().flatten() {
if let Some(obj) = by_key.get(key) {
component.extensions.source_json = Some(Box::new(obj.clone()));
break;
}
}
}
}
fn index_cyclonedx(doc: &Value, by_key: &mut HashMap<String, Value>) {
if doc.get("bomFormat").is_none() {
return;
}
if let Some(meta_comp) = doc.pointer("/metadata/component") {
index_cdx_component(meta_comp, by_key);
}
if let Some(components) = doc.get("components").and_then(Value::as_array) {
for comp in components {
index_cdx_component(comp, by_key);
}
}
}
fn index_cdx_component(comp: &Value, by_key: &mut HashMap<String, Value>) {
if let Some(bom_ref) = comp.get("bom-ref").and_then(Value::as_str) {
by_key.entry(bom_ref.to_string()).or_insert(comp.clone());
}
if let Some(purl) = comp.get("purl").and_then(Value::as_str) {
by_key.entry(purl.to_string()).or_insert(comp.clone());
}
if let Some(name) = comp.get("name").and_then(Value::as_str) {
by_key.entry(name.to_string()).or_insert(comp.clone());
}
}
fn index_spdx2(doc: &Value, by_key: &mut HashMap<String, Value>) {
if doc.get("spdxVersion").is_none() {
return;
}
if let Some(packages) = doc.get("packages").and_then(Value::as_array) {
for pkg in packages {
if let Some(id) = pkg.get("SPDXID").and_then(Value::as_str) {
by_key.entry(id.to_string()).or_insert(pkg.clone());
}
if let Some(name) = pkg.get("name").and_then(Value::as_str) {
by_key.entry(name.to_string()).or_insert(pkg.clone());
}
if let Some(purl) = spdx2_purl(pkg) {
by_key.entry(purl).or_insert(pkg.clone());
}
}
}
}
fn spdx2_purl(pkg: &Value) -> Option<String> {
pkg.get("externalRefs")
.and_then(Value::as_array)?
.iter()
.find(|r| r.get("referenceType").and_then(Value::as_str) == Some("purl"))
.and_then(|r| r.get("referenceLocator").and_then(Value::as_str))
.map(str::to_string)
}
fn index_spdx3(doc: &Value, by_key: &mut HashMap<String, Value>) {
if doc.get("@context").is_none() {
return;
}
let graph: Vec<&Value> = if let Some(arr) = doc.as_array() {
arr.iter().collect()
} else if let Some(arr) = doc.get("@graph").and_then(Value::as_array) {
arr.iter().collect()
} else {
std::slice::from_ref(doc).iter().collect()
};
for el in graph {
if let Some(id) = el.get("spdxId").and_then(Value::as_str) {
by_key.entry(id.to_string()).or_insert(el.clone());
}
if let Some(name) = el.get("name").and_then(Value::as_str) {
by_key.entry(name.to_string()).or_insert(el.clone());
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parsers::parse_sbom_str;
const CDX: &str = r#"{
"bomFormat": "CycloneDX", "specVersion": "1.5", "version": 1,
"components": [
{"type": "library", "bom-ref": "lodash@4.17.21", "name": "lodash",
"version": "4.17.21", "purl": "pkg:npm/lodash@4.17.21",
"x-custom-field": "preserve-me"}
]
}"#;
#[test]
fn captures_source_json_for_cyclonedx() {
let mut sbom = parse_sbom_str(CDX).unwrap();
assert!(
sbom.components
.values()
.all(|c| c.extensions.source_json.is_none())
);
preserve_source_json(CDX, &mut sbom);
let comp = sbom.components.values().next().unwrap();
let src = comp.extensions.source_json.as_ref().expect("preserved");
assert_eq!(
src.get("x-custom-field").and_then(Value::as_str),
Some("preserve-me")
);
}
#[test]
fn invalid_json_is_noop() {
let mut sbom = parse_sbom_str(CDX).unwrap();
preserve_source_json("{not json", &mut sbom);
assert!(
sbom.components
.values()
.all(|c| c.extensions.source_json.is_none())
);
}
#[test]
fn captures_source_json_for_spdx2() {
let spdx = r#"{
"spdxVersion": "SPDX-2.3", "SPDXID": "SPDXRef-DOCUMENT", "name": "x",
"dataLicense": "CC0-1.0",
"documentNamespace": "https://example.com/x",
"creationInfo": {"created": "2026-01-04T12:00:00Z",
"creators": ["Tool: t-1.0"]},
"packages": [
{"SPDXID": "SPDXRef-Package-lodash", "name": "lodash",
"versionInfo": "4.17.21",
"downloadLocation": "NOASSERTION",
"externalRefs": [{"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": "pkg:npm/lodash@4.17.21"}]}
]
}"#;
let mut sbom = parse_sbom_str(spdx).unwrap();
preserve_source_json(spdx, &mut sbom);
assert!(
sbom.components
.values()
.any(|c| c.extensions.source_json.is_some())
);
}
}