sbom_tools/serialization/
merger.rs1use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use std::collections::HashSet;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct MergeConfig {
13 pub dedup_strategy: DeduplicationStrategy,
15}
16
17impl Default for MergeConfig {
18 fn default() -> Self {
19 Self {
20 dedup_strategy: DeduplicationStrategy::Name,
21 }
22 }
23}
24
25#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
27pub enum DeduplicationStrategy {
28 Name,
30 Purl,
32 None,
34}
35
36pub fn merge_sbom_json(
47 primary_json: &str,
48 secondary_json: &str,
49 config: &MergeConfig,
50) -> anyhow::Result<String> {
51 let mut primary: Value = serde_json::from_str(primary_json)?;
52 let secondary: Value = serde_json::from_str(secondary_json)?;
53
54 let primary_is_cdx = primary.get("bomFormat").is_some();
55 let secondary_is_cdx = secondary.get("bomFormat").is_some();
56 let primary_is_spdx3 = primary.get("@context").is_some();
57 let secondary_is_spdx3 = secondary.get("@context").is_some();
58
59 if primary_is_cdx != secondary_is_cdx {
61 anyhow::bail!("cannot merge CycloneDX and SPDX SBOMs — both must be the same format");
62 }
63
64 if primary_is_cdx {
65 merge_cyclonedx(&mut primary, &secondary, config)?;
66 } else if primary_is_spdx3 {
67 if !secondary_is_spdx3 {
68 anyhow::bail!("cannot merge SPDX 3.0 and SPDX 2.x SBOMs");
69 }
70 merge_spdx3(&mut primary, &secondary, config)?;
71 } else {
72 merge_spdx2(&mut primary, &secondary, config)?;
73 }
74
75 Ok(serde_json::to_string_pretty(&primary)?)
76}
77
78fn merge_cyclonedx(
79 primary: &mut Value,
80 secondary: &Value,
81 config: &MergeConfig,
82) -> anyhow::Result<()> {
83 let primary_components = primary.get_mut("components").and_then(Value::as_array_mut);
84
85 let secondary_components = secondary.get("components").and_then(Value::as_array);
86
87 if let (Some(p_comps), Some(s_comps)) = (primary_components, secondary_components) {
88 if config.dedup_strategy == DeduplicationStrategy::None {
89 for comp in s_comps {
91 p_comps.push(comp.clone());
92 }
93 } else {
94 let mut seen = build_seen_set(p_comps, config);
96
97 for comp in s_comps {
99 let key = component_key(comp, config);
100 if seen.insert(key) {
101 p_comps.push(comp.clone());
102 }
103 }
104 }
105 }
106
107 let primary_deps = primary
109 .get_mut("dependencies")
110 .and_then(Value::as_array_mut);
111
112 let secondary_deps = secondary.get("dependencies").and_then(Value::as_array);
113
114 if let (Some(p_deps), Some(s_deps)) = (primary_deps, secondary_deps) {
115 let existing_refs: HashSet<String> = p_deps
116 .iter()
117 .filter_map(|d| d.get("ref").and_then(Value::as_str).map(String::from))
118 .collect();
119
120 for dep in s_deps {
121 let dep_ref = dep.get("ref").and_then(Value::as_str).unwrap_or("");
122 if !existing_refs.contains(dep_ref) {
123 p_deps.push(dep.clone());
124 }
125 }
126 }
127
128 merge_array_field(primary, secondary, "vulnerabilities");
130
131 Ok(())
132}
133
134fn merge_spdx3(primary: &mut Value, secondary: &Value, config: &MergeConfig) -> anyhow::Result<()> {
135 let primary_key = if primary.get("element").is_some() {
136 "element"
137 } else {
138 "@graph"
139 };
140 let primary_elements = primary.get_mut(primary_key).and_then(Value::as_array_mut);
141
142 let secondary_key = if secondary.get("element").is_some() {
143 "element"
144 } else {
145 "@graph"
146 };
147 let secondary_elements = secondary.get(secondary_key).and_then(Value::as_array);
148
149 if let (Some(p_elems), Some(s_elems)) = (primary_elements, secondary_elements) {
150 let mut seen: HashSet<String> = p_elems
151 .iter()
152 .filter_map(|e| e.get("spdxId").and_then(Value::as_str).map(String::from))
153 .collect();
154
155 for elem in s_elems {
156 let spdx_id = elem.get("spdxId").and_then(Value::as_str).unwrap_or("");
157
158 let elem_type = elem.get("type").and_then(Value::as_str).unwrap_or("");
160 if elem_type.contains("Package") || elem_type.contains("package") {
161 let key = component_key(elem, config);
162 if !seen.insert(key) {
163 continue;
164 }
165 } else if !seen.insert(spdx_id.to_string()) {
166 continue;
167 }
168
169 p_elems.push(elem.clone());
170 }
171 }
172
173 Ok(())
174}
175
176fn merge_spdx2(primary: &mut Value, secondary: &Value, config: &MergeConfig) -> anyhow::Result<()> {
177 if let (Some(p_pkgs), Some(s_pkgs)) = (
179 primary.get_mut("packages").and_then(Value::as_array_mut),
180 secondary.get("packages").and_then(Value::as_array),
181 ) {
182 if config.dedup_strategy == DeduplicationStrategy::None {
183 for pkg in s_pkgs {
184 p_pkgs.push(pkg.clone());
185 }
186 } else {
187 let mut seen = build_seen_set(p_pkgs, config);
188 for pkg in s_pkgs {
189 let key = component_key(pkg, config);
190 if seen.insert(key) {
191 p_pkgs.push(pkg.clone());
192 }
193 }
194 }
195 }
196
197 merge_array_field(primary, secondary, "relationships");
199
200 Ok(())
201}
202
203fn build_seen_set(components: &[Value], config: &MergeConfig) -> HashSet<String> {
205 components
206 .iter()
207 .map(|c| component_key(c, config))
208 .collect()
209}
210
211fn component_key(comp: &Value, config: &MergeConfig) -> String {
213 match config.dedup_strategy {
214 DeduplicationStrategy::Purl => {
215 if let Some(purl) = comp.get("purl").and_then(Value::as_str) {
217 return purl.to_string();
218 }
219 if let Some(refs) = comp.get("externalReferences").and_then(Value::as_array) {
221 for r in refs {
222 if r.get("type").and_then(Value::as_str) == Some("purl") {
223 if let Some(url) = r.get("url").and_then(Value::as_str) {
224 return url.to_string();
225 }
226 }
227 }
228 }
229 name_version_key(comp)
231 }
232 DeduplicationStrategy::Name | DeduplicationStrategy::None => name_version_key(comp),
233 }
234}
235
236fn name_version_key(comp: &Value) -> String {
237 let name = comp.get("name").and_then(Value::as_str).unwrap_or("");
238 let version = comp
239 .get("version")
240 .or_else(|| comp.get("versionInfo"))
241 .and_then(Value::as_str)
242 .unwrap_or("");
243 format!("{name}@{version}")
244}
245
246fn merge_array_field(primary: &mut Value, secondary: &Value, field: &str) {
248 if let Some(s_arr) = secondary.get(field).and_then(Value::as_array) {
249 let p_arr = primary.as_object_mut().and_then(|o| {
250 o.entry(field)
251 .or_insert_with(|| Value::Array(Vec::new()))
252 .as_array_mut()
253 });
254 if let Some(p) = p_arr {
255 for item in s_arr {
256 p.push(item.clone());
257 }
258 }
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265
266 #[test]
267 fn merge_cyclonedx_dedup() {
268 let primary = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
269 {"name":"foo","version":"1.0"},
270 {"name":"bar","version":"2.0"}
271 ]}"#;
272
273 let secondary = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
274 {"name":"foo","version":"1.0"},
275 {"name":"baz","version":"3.0"}
276 ]}"#;
277
278 let result = merge_sbom_json(primary, secondary, &MergeConfig::default()).unwrap();
279 let doc: Value = serde_json::from_str(&result).unwrap();
280 let components = doc["components"].as_array().unwrap();
281 assert_eq!(components.len(), 3); }
283
284 #[test]
285 fn merge_different_formats_fails() {
286 let cdx = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[]}"#;
287 let spdx = r#"{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT","packages":[]}"#;
288
289 let result = merge_sbom_json(cdx, spdx, &MergeConfig::default());
290 assert!(result.is_err());
291 }
292
293 #[test]
294 fn merge_no_dedup() {
295 let a = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
296 {"name":"foo","version":"1.0"}
297 ]}"#;
298 let b = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
299 {"name":"foo","version":"1.0"}
300 ]}"#;
301
302 let config = MergeConfig {
303 dedup_strategy: DeduplicationStrategy::None,
304 };
305 let result = merge_sbom_json(a, b, &config).unwrap();
306 let doc: Value = serde_json::from_str(&result).unwrap();
307 let components = doc["components"].as_array().unwrap();
308 assert_eq!(components.len(), 2);
310 }
311}