Skip to main content

sbom_tools/serialization/
merger.rs

1//! SBOM merging.
2//!
3//! Combines multiple SBOMs into a single document, deduplicating
4//! components based on configurable strategies.
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use std::collections::HashSet;
9
10/// Configuration for SBOM merging
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct MergeConfig {
13    /// Deduplication strategy
14    pub dedup_strategy: DeduplicationStrategy,
15}
16
17impl Default for MergeConfig {
18    fn default() -> Self {
19        Self {
20            dedup_strategy: DeduplicationStrategy::Name,
21        }
22    }
23}
24
25/// Strategy for deduplicating components during merge
26#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
27pub enum DeduplicationStrategy {
28    /// Deduplicate by package name + version
29    Name,
30    /// Deduplicate by PURL (exact match)
31    Purl,
32    /// Keep all components (no dedup)
33    None,
34}
35
36/// Merge two SBOM JSON documents into one.
37///
38/// The primary SBOM provides the document metadata; components from both
39/// are merged with deduplication.
40///
41/// Both SBOMs must be the same format (CycloneDX or SPDX).
42///
43/// # Errors
44///
45/// Returns error if the SBOMs are different formats or JSON parsing fails.
46pub fn merge_sbom_json(
47    primary_json: &str,
48    secondary_json: &str,
49    config: &MergeConfig,
50) -> anyhow::Result<String> {
51    let mut primary: Value = serde_json::from_str(primary_json)?;
52    let secondary: Value = serde_json::from_str(secondary_json)?;
53
54    let primary_is_cdx = primary.get("bomFormat").is_some();
55    let secondary_is_cdx = secondary.get("bomFormat").is_some();
56    let primary_is_spdx3 = primary.get("@context").is_some();
57    let secondary_is_spdx3 = secondary.get("@context").is_some();
58
59    // Verify same format family
60    if primary_is_cdx != secondary_is_cdx {
61        anyhow::bail!("cannot merge CycloneDX and SPDX SBOMs — both must be the same format");
62    }
63
64    if primary_is_cdx {
65        merge_cyclonedx(&mut primary, &secondary, config)?;
66    } else if primary_is_spdx3 {
67        if !secondary_is_spdx3 {
68            anyhow::bail!("cannot merge SPDX 3.0 and SPDX 2.x SBOMs");
69        }
70        merge_spdx3(&mut primary, &secondary, config)?;
71    } else {
72        merge_spdx2(&mut primary, &secondary, config)?;
73    }
74
75    Ok(serde_json::to_string_pretty(&primary)?)
76}
77
78fn merge_cyclonedx(
79    primary: &mut Value,
80    secondary: &Value,
81    config: &MergeConfig,
82) -> anyhow::Result<()> {
83    let primary_components = primary.get_mut("components").and_then(Value::as_array_mut);
84
85    let secondary_components = secondary.get("components").and_then(Value::as_array);
86
87    if let (Some(p_comps), Some(s_comps)) = (primary_components, secondary_components) {
88        if config.dedup_strategy == DeduplicationStrategy::None {
89            // No deduplication — keep all components
90            for comp in s_comps {
91                p_comps.push(comp.clone());
92            }
93        } else {
94            // Build dedup set from primary
95            let mut seen = build_seen_set(p_comps, config);
96
97            // Add non-duplicate components from secondary
98            for comp in s_comps {
99                let key = component_key(comp, config);
100                if seen.insert(key) {
101                    p_comps.push(comp.clone());
102                }
103            }
104        }
105    }
106
107    // Merge dependencies
108    let primary_deps = primary
109        .get_mut("dependencies")
110        .and_then(Value::as_array_mut);
111
112    let secondary_deps = secondary.get("dependencies").and_then(Value::as_array);
113
114    if let (Some(p_deps), Some(s_deps)) = (primary_deps, secondary_deps) {
115        let existing_refs: HashSet<String> = p_deps
116            .iter()
117            .filter_map(|d| d.get("ref").and_then(Value::as_str).map(String::from))
118            .collect();
119
120        for dep in s_deps {
121            let dep_ref = dep.get("ref").and_then(Value::as_str).unwrap_or("");
122            if !existing_refs.contains(dep_ref) {
123                p_deps.push(dep.clone());
124            }
125        }
126    }
127
128    // Merge vulnerabilities
129    merge_array_field(primary, secondary, "vulnerabilities");
130
131    Ok(())
132}
133
134fn merge_spdx3(primary: &mut Value, secondary: &Value, config: &MergeConfig) -> anyhow::Result<()> {
135    let primary_key = if primary.get("element").is_some() {
136        "element"
137    } else {
138        "@graph"
139    };
140    let primary_elements = primary.get_mut(primary_key).and_then(Value::as_array_mut);
141
142    let secondary_key = if secondary.get("element").is_some() {
143        "element"
144    } else {
145        "@graph"
146    };
147    let secondary_elements = secondary.get(secondary_key).and_then(Value::as_array);
148
149    if let (Some(p_elems), Some(s_elems)) = (primary_elements, secondary_elements) {
150        let mut seen: HashSet<String> = p_elems
151            .iter()
152            .filter_map(|e| e.get("spdxId").and_then(Value::as_str).map(String::from))
153            .collect();
154
155        for elem in s_elems {
156            let spdx_id = elem.get("spdxId").and_then(Value::as_str).unwrap_or("");
157
158            // For packages, apply dedup logic
159            let elem_type = elem.get("type").and_then(Value::as_str).unwrap_or("");
160            if elem_type.contains("Package") || elem_type.contains("package") {
161                let key = component_key(elem, config);
162                if !seen.insert(key) {
163                    continue;
164                }
165            } else if !seen.insert(spdx_id.to_string()) {
166                continue;
167            }
168
169            p_elems.push(elem.clone());
170        }
171    }
172
173    Ok(())
174}
175
176fn merge_spdx2(primary: &mut Value, secondary: &Value, config: &MergeConfig) -> anyhow::Result<()> {
177    // Merge packages
178    if let (Some(p_pkgs), Some(s_pkgs)) = (
179        primary.get_mut("packages").and_then(Value::as_array_mut),
180        secondary.get("packages").and_then(Value::as_array),
181    ) {
182        if config.dedup_strategy == DeduplicationStrategy::None {
183            for pkg in s_pkgs {
184                p_pkgs.push(pkg.clone());
185            }
186        } else {
187            let mut seen = build_seen_set(p_pkgs, config);
188            for pkg in s_pkgs {
189                let key = component_key(pkg, config);
190                if seen.insert(key) {
191                    p_pkgs.push(pkg.clone());
192                }
193            }
194        }
195    }
196
197    // Merge relationships
198    merge_array_field(primary, secondary, "relationships");
199
200    Ok(())
201}
202
203/// Build a set of dedup keys from existing components
204fn build_seen_set(components: &[Value], config: &MergeConfig) -> HashSet<String> {
205    components
206        .iter()
207        .map(|c| component_key(c, config))
208        .collect()
209}
210
211/// Generate a deduplication key for a component
212fn component_key(comp: &Value, config: &MergeConfig) -> String {
213    match config.dedup_strategy {
214        DeduplicationStrategy::Purl => {
215            // Try purl field directly
216            if let Some(purl) = comp.get("purl").and_then(Value::as_str) {
217                return purl.to_string();
218            }
219            // Try externalReferences for PURL
220            if let Some(refs) = comp.get("externalReferences").and_then(Value::as_array) {
221                for r in refs {
222                    if r.get("type").and_then(Value::as_str) == Some("purl") {
223                        if let Some(url) = r.get("url").and_then(Value::as_str) {
224                            return url.to_string();
225                        }
226                    }
227                }
228            }
229            // Fall back to name-version
230            name_version_key(comp)
231        }
232        DeduplicationStrategy::Name | DeduplicationStrategy::None => name_version_key(comp),
233    }
234}
235
236fn name_version_key(comp: &Value) -> String {
237    let name = comp.get("name").and_then(Value::as_str).unwrap_or("");
238    let version = comp
239        .get("version")
240        .or_else(|| comp.get("versionInfo"))
241        .and_then(Value::as_str)
242        .unwrap_or("");
243    format!("{name}@{version}")
244}
245
246/// Merge an array field from secondary into primary (append new entries)
247fn merge_array_field(primary: &mut Value, secondary: &Value, field: &str) {
248    if let Some(s_arr) = secondary.get(field).and_then(Value::as_array) {
249        let p_arr = primary.as_object_mut().and_then(|o| {
250            o.entry(field)
251                .or_insert_with(|| Value::Array(Vec::new()))
252                .as_array_mut()
253        });
254        if let Some(p) = p_arr {
255            for item in s_arr {
256                p.push(item.clone());
257            }
258        }
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    #[test]
267    fn merge_cyclonedx_dedup() {
268        let primary = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
269            {"name":"foo","version":"1.0"},
270            {"name":"bar","version":"2.0"}
271        ]}"#;
272
273        let secondary = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
274            {"name":"foo","version":"1.0"},
275            {"name":"baz","version":"3.0"}
276        ]}"#;
277
278        let result = merge_sbom_json(primary, secondary, &MergeConfig::default()).unwrap();
279        let doc: Value = serde_json::from_str(&result).unwrap();
280        let components = doc["components"].as_array().unwrap();
281        assert_eq!(components.len(), 3); // foo, bar, baz (foo deduped)
282    }
283
284    #[test]
285    fn merge_different_formats_fails() {
286        let cdx = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[]}"#;
287        let spdx = r#"{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT","packages":[]}"#;
288
289        let result = merge_sbom_json(cdx, spdx, &MergeConfig::default());
290        assert!(result.is_err());
291    }
292
293    #[test]
294    fn merge_no_dedup() {
295        let a = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
296            {"name":"foo","version":"1.0"}
297        ]}"#;
298        let b = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
299            {"name":"foo","version":"1.0"}
300        ]}"#;
301
302        let config = MergeConfig {
303            dedup_strategy: DeduplicationStrategy::None,
304        };
305        let result = merge_sbom_json(a, b, &config).unwrap();
306        let doc: Value = serde_json::from_str(&result).unwrap();
307        let components = doc["components"].as_array().unwrap();
308        // None strategy keeps all components, including duplicates
309        assert_eq!(components.len(), 2);
310    }
311}