sbom_tools/serialization/
merger.rs1use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use std::collections::HashSet;
9
10use super::ValueExt;
11
12#[derive(Debug, thiserror::Error)]
14pub enum MergeError {
15 #[error("cannot merge CycloneDX and SPDX SBOMs — both must be the same format")]
17 FormatMismatch,
18 #[error("cannot merge SPDX 3.0 and SPDX 2.x SBOMs")]
20 SpdxVersionMismatch,
21 #[error(transparent)]
23 Json(#[from] serde_json::Error),
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct MergeConfig {
29 pub dedup_strategy: DeduplicationStrategy,
31}
32
33impl Default for MergeConfig {
34 fn default() -> Self {
35 Self {
36 dedup_strategy: DeduplicationStrategy::Name,
37 }
38 }
39}
40
41#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, clap::ValueEnum)]
43#[serde(rename_all = "kebab-case")]
44pub enum DeduplicationStrategy {
45 #[default]
47 Name,
48 Purl,
50 None,
52}
53
54pub fn merge_sbom_json(
65 primary_json: &str,
66 secondary_json: &str,
67 config: &MergeConfig,
68) -> Result<String, MergeError> {
69 let mut primary: Value = serde_json::from_str(primary_json)?;
70 let secondary: Value = serde_json::from_str(secondary_json)?;
71
72 let primary_is_cdx = primary.get("bomFormat").is_some();
73 let secondary_is_cdx = secondary.get("bomFormat").is_some();
74 let primary_is_spdx3 = primary.get("@context").is_some();
75 let secondary_is_spdx3 = secondary.get("@context").is_some();
76
77 if primary_is_cdx != secondary_is_cdx {
79 return Err(MergeError::FormatMismatch);
80 }
81
82 if primary_is_cdx {
83 merge_cyclonedx(&mut primary, &secondary, config)?;
84 } else if primary_is_spdx3 {
85 if !secondary_is_spdx3 {
86 return Err(MergeError::SpdxVersionMismatch);
87 }
88 merge_spdx3(&mut primary, &secondary, config)?;
89 } else {
90 merge_spdx2(&mut primary, &secondary, config)?;
91 }
92
93 Ok(serde_json::to_string_pretty(&primary)?)
94}
95
96fn merge_cyclonedx(
97 primary: &mut Value,
98 secondary: &Value,
99 config: &MergeConfig,
100) -> Result<(), MergeError> {
101 let primary_components = primary.get_mut("components").and_then(Value::as_array_mut);
102
103 let secondary_components = secondary.get("components").and_then(Value::as_array);
104
105 if let (Some(p_comps), Some(s_comps)) = (primary_components, secondary_components) {
106 if config.dedup_strategy == DeduplicationStrategy::None {
107 for comp in s_comps {
109 p_comps.push(comp.clone());
110 }
111 } else {
112 let mut seen = build_seen_set(p_comps, config);
114
115 for comp in s_comps {
117 let key = component_key(comp, config);
118 if seen.insert(key) {
119 p_comps.push(comp.clone());
120 }
121 }
122 }
123 }
124
125 let primary_deps = primary
127 .get_mut("dependencies")
128 .and_then(Value::as_array_mut);
129
130 let secondary_deps = secondary.get("dependencies").and_then(Value::as_array);
131
132 if let (Some(p_deps), Some(s_deps)) = (primary_deps, secondary_deps) {
133 let existing_refs: HashSet<String> = p_deps
134 .iter()
135 .filter_map(|d| d.get("ref").and_then(Value::as_str).map(String::from))
136 .collect();
137
138 for dep in s_deps {
139 let dep_ref = dep.str_field("ref");
140 if !existing_refs.contains(dep_ref) {
141 p_deps.push(dep.clone());
142 }
143 }
144 }
145
146 merge_array_field(primary, secondary, "vulnerabilities");
148
149 Ok(())
150}
151
152fn merge_spdx3(
153 primary: &mut Value,
154 secondary: &Value,
155 config: &MergeConfig,
156) -> Result<(), MergeError> {
157 let primary_key = if primary.get("element").is_some() {
158 "element"
159 } else {
160 "@graph"
161 };
162 let primary_elements = primary.get_mut(primary_key).and_then(Value::as_array_mut);
163
164 let secondary_key = if secondary.get("element").is_some() {
165 "element"
166 } else {
167 "@graph"
168 };
169 let secondary_elements = secondary.get(secondary_key).and_then(Value::as_array);
170
171 if let (Some(p_elems), Some(s_elems)) = (primary_elements, secondary_elements) {
172 let mut seen: HashSet<String> = p_elems
173 .iter()
174 .filter_map(|e| e.get("spdxId").and_then(Value::as_str).map(String::from))
175 .collect();
176
177 for elem in s_elems {
178 let spdx_id = elem.str_field("spdxId");
179
180 let elem_type = elem.str_field("type");
182 if elem_type.contains("Package") || elem_type.contains("package") {
183 let key = component_key(elem, config);
184 if !seen.insert(key) {
185 continue;
186 }
187 } else if !seen.insert(spdx_id.to_string()) {
188 continue;
189 }
190
191 p_elems.push(elem.clone());
192 }
193 }
194
195 Ok(())
196}
197
198fn merge_spdx2(
199 primary: &mut Value,
200 secondary: &Value,
201 config: &MergeConfig,
202) -> Result<(), MergeError> {
203 if let (Some(p_pkgs), Some(s_pkgs)) = (
205 primary.get_mut("packages").and_then(Value::as_array_mut),
206 secondary.get("packages").and_then(Value::as_array),
207 ) {
208 if config.dedup_strategy == DeduplicationStrategy::None {
209 for pkg in s_pkgs {
210 p_pkgs.push(pkg.clone());
211 }
212 } else {
213 let mut seen = build_seen_set(p_pkgs, config);
214 for pkg in s_pkgs {
215 let key = component_key(pkg, config);
216 if seen.insert(key) {
217 p_pkgs.push(pkg.clone());
218 }
219 }
220 }
221 }
222
223 merge_array_field(primary, secondary, "relationships");
225
226 Ok(())
227}
228
229fn build_seen_set(components: &[Value], config: &MergeConfig) -> HashSet<String> {
231 components
232 .iter()
233 .map(|c| component_key(c, config))
234 .collect()
235}
236
237fn component_key(comp: &Value, config: &MergeConfig) -> String {
239 match config.dedup_strategy {
240 DeduplicationStrategy::Purl => {
241 if let Some(purl) = comp.get("purl").and_then(Value::as_str) {
243 return purl.to_string();
244 }
245 if let Some(refs) = comp.get("externalReferences").and_then(Value::as_array) {
247 for r in refs {
248 if r.get("type").and_then(Value::as_str) == Some("purl")
249 && let Some(url) = r.get("url").and_then(Value::as_str)
250 {
251 return url.to_string();
252 }
253 }
254 }
255 name_version_key(comp)
257 }
258 DeduplicationStrategy::Name | DeduplicationStrategy::None => name_version_key(comp),
259 }
260}
261
262fn name_version_key(comp: &Value) -> String {
263 if let Some(cp) = comp.get("cryptoProperties")
265 && let Some(oid) = cp.get("oid").and_then(Value::as_str)
266 {
267 let asset_type = cp
268 .get("assetType")
269 .and_then(Value::as_str)
270 .unwrap_or("unknown");
271 return format!("crypto:{asset_type}:{oid}");
272 }
273 let name = comp.str_field("name");
274 let version = comp
275 .get("version")
276 .or_else(|| comp.get("versionInfo"))
277 .and_then(Value::as_str)
278 .unwrap_or("");
279 format!("{name}@{version}")
280}
281
282fn merge_array_field(primary: &mut Value, secondary: &Value, field: &str) {
284 if let Some(s_arr) = secondary.get(field).and_then(Value::as_array) {
285 let p_arr = primary.as_object_mut().and_then(|o| {
286 o.entry(field)
287 .or_insert_with(|| Value::Array(Vec::new()))
288 .as_array_mut()
289 });
290 if let Some(p) = p_arr {
291 for item in s_arr {
292 p.push(item.clone());
293 }
294 }
295 }
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301
302 #[test]
303 fn merge_cyclonedx_dedup() {
304 let primary = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
305 {"name":"foo","version":"1.0"},
306 {"name":"bar","version":"2.0"}
307 ]}"#;
308
309 let secondary = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
310 {"name":"foo","version":"1.0"},
311 {"name":"baz","version":"3.0"}
312 ]}"#;
313
314 let result = merge_sbom_json(primary, secondary, &MergeConfig::default()).unwrap();
315 let doc: Value = serde_json::from_str(&result).unwrap();
316 let components = doc["components"].as_array().unwrap();
317 assert_eq!(components.len(), 3); }
319
320 #[test]
321 fn merge_different_formats_fails() {
322 let cdx = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[]}"#;
323 let spdx = r#"{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT","packages":[]}"#;
324
325 let result = merge_sbom_json(cdx, spdx, &MergeConfig::default());
326 assert!(result.is_err());
327 }
328
329 #[test]
330 fn merge_no_dedup() {
331 let a = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
332 {"name":"foo","version":"1.0"}
333 ]}"#;
334 let b = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
335 {"name":"foo","version":"1.0"}
336 ]}"#;
337
338 let config = MergeConfig {
339 dedup_strategy: DeduplicationStrategy::None,
340 };
341 let result = merge_sbom_json(a, b, &config).unwrap();
342 let doc: Value = serde_json::from_str(&result).unwrap();
343 let components = doc["components"].as_array().unwrap();
344 assert_eq!(components.len(), 2);
346 }
347
348 #[test]
349 fn merge_crypto_oid_dedup() {
350 let primary = r#"{"bomFormat":"CycloneDX","specVersion":"1.6","components":[
351 {"name":"AES-256-GCM","type":"cryptographic-asset","cryptoProperties":{"assetType":"algorithm","oid":"2.16.840.1.101.3.4.1.46"}}
352 ]}"#;
353
354 let secondary = r#"{"bomFormat":"CycloneDX","specVersion":"1.6","components":[
355 {"name":"AES-256-GCM-v2","type":"cryptographic-asset","cryptoProperties":{"assetType":"algorithm","oid":"2.16.840.1.101.3.4.1.46"}},
356 {"name":"SHA-384","type":"cryptographic-asset","cryptoProperties":{"assetType":"algorithm","oid":"2.16.840.1.101.3.4.2.2"}}
357 ]}"#;
358
359 let result = merge_sbom_json(primary, secondary, &MergeConfig::default()).unwrap();
360 let doc: Value = serde_json::from_str(&result).unwrap();
361 let components = doc["components"].as_array().unwrap();
362 assert_eq!(components.len(), 2);
364 }
365}