Skip to main content

sbom_tools/serialization/
merger.rs

1//! SBOM merging.
2//!
3//! Combines multiple SBOMs into a single document, deduplicating
4//! components based on configurable strategies.
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use std::collections::HashSet;
9
10use super::ValueExt;
11
12/// Errors that can occur during SBOM merging
13#[derive(Debug, thiserror::Error)]
14pub enum MergeError {
15    /// The two SBOMs are different formats (e.g., CycloneDX and SPDX)
16    #[error("cannot merge CycloneDX and SPDX SBOMs — both must be the same format")]
17    FormatMismatch,
18    /// The two SBOMs are incompatible SPDX versions
19    #[error("cannot merge SPDX 3.0 and SPDX 2.x SBOMs")]
20    SpdxVersionMismatch,
21    /// JSON serialization/deserialization error
22    #[error(transparent)]
23    Json(#[from] serde_json::Error),
24}
25
26/// Configuration for SBOM merging
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct MergeConfig {
29    /// Deduplication strategy
30    pub dedup_strategy: DeduplicationStrategy,
31}
32
33impl Default for MergeConfig {
34    fn default() -> Self {
35        Self {
36            dedup_strategy: DeduplicationStrategy::Name,
37        }
38    }
39}
40
41/// Strategy for deduplicating components during merge
42#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, clap::ValueEnum)]
43#[serde(rename_all = "kebab-case")]
44pub enum DeduplicationStrategy {
45    /// Deduplicate by package name + version
46    #[default]
47    Name,
48    /// Deduplicate by PURL (exact match)
49    Purl,
50    /// Keep all components (no dedup)
51    None,
52}
53
54/// Merge two SBOM JSON documents into one.
55///
56/// The primary SBOM provides the document metadata; components from both
57/// are merged with deduplication.
58///
59/// Both SBOMs must be the same format (CycloneDX or SPDX).
60///
61/// # Errors
62///
63/// Returns error if the SBOMs are different formats or JSON parsing fails.
64pub fn merge_sbom_json(
65    primary_json: &str,
66    secondary_json: &str,
67    config: &MergeConfig,
68) -> Result<String, MergeError> {
69    let mut primary: Value = serde_json::from_str(primary_json)?;
70    let secondary: Value = serde_json::from_str(secondary_json)?;
71
72    let primary_is_cdx = primary.get("bomFormat").is_some();
73    let secondary_is_cdx = secondary.get("bomFormat").is_some();
74    let primary_is_spdx3 = primary.get("@context").is_some();
75    let secondary_is_spdx3 = secondary.get("@context").is_some();
76
77    // Verify same format family
78    if primary_is_cdx != secondary_is_cdx {
79        return Err(MergeError::FormatMismatch);
80    }
81
82    if primary_is_cdx {
83        merge_cyclonedx(&mut primary, &secondary, config)?;
84    } else if primary_is_spdx3 {
85        if !secondary_is_spdx3 {
86            return Err(MergeError::SpdxVersionMismatch);
87        }
88        merge_spdx3(&mut primary, &secondary, config)?;
89    } else {
90        merge_spdx2(&mut primary, &secondary, config)?;
91    }
92
93    Ok(serde_json::to_string_pretty(&primary)?)
94}
95
96fn merge_cyclonedx(
97    primary: &mut Value,
98    secondary: &Value,
99    config: &MergeConfig,
100) -> Result<(), MergeError> {
101    let primary_components = primary.get_mut("components").and_then(Value::as_array_mut);
102
103    let secondary_components = secondary.get("components").and_then(Value::as_array);
104
105    if let (Some(p_comps), Some(s_comps)) = (primary_components, secondary_components) {
106        if config.dedup_strategy == DeduplicationStrategy::None {
107            // No deduplication — keep all components
108            for comp in s_comps {
109                p_comps.push(comp.clone());
110            }
111        } else {
112            // Build dedup set from primary
113            let mut seen = build_seen_set(p_comps, config);
114
115            // Add non-duplicate components from secondary
116            for comp in s_comps {
117                let key = component_key(comp, config);
118                if seen.insert(key) {
119                    p_comps.push(comp.clone());
120                }
121            }
122        }
123    }
124
125    // Merge dependencies
126    let primary_deps = primary
127        .get_mut("dependencies")
128        .and_then(Value::as_array_mut);
129
130    let secondary_deps = secondary.get("dependencies").and_then(Value::as_array);
131
132    if let (Some(p_deps), Some(s_deps)) = (primary_deps, secondary_deps) {
133        let existing_refs: HashSet<String> = p_deps
134            .iter()
135            .filter_map(|d| d.get("ref").and_then(Value::as_str).map(String::from))
136            .collect();
137
138        for dep in s_deps {
139            let dep_ref = dep.str_field("ref");
140            if !existing_refs.contains(dep_ref) {
141                p_deps.push(dep.clone());
142            }
143        }
144    }
145
146    // Merge vulnerabilities
147    merge_array_field(primary, secondary, "vulnerabilities");
148
149    Ok(())
150}
151
152fn merge_spdx3(
153    primary: &mut Value,
154    secondary: &Value,
155    config: &MergeConfig,
156) -> Result<(), MergeError> {
157    let primary_key = if primary.get("element").is_some() {
158        "element"
159    } else {
160        "@graph"
161    };
162    let primary_elements = primary.get_mut(primary_key).and_then(Value::as_array_mut);
163
164    let secondary_key = if secondary.get("element").is_some() {
165        "element"
166    } else {
167        "@graph"
168    };
169    let secondary_elements = secondary.get(secondary_key).and_then(Value::as_array);
170
171    if let (Some(p_elems), Some(s_elems)) = (primary_elements, secondary_elements) {
172        let mut seen: HashSet<String> = p_elems
173            .iter()
174            .filter_map(|e| e.get("spdxId").and_then(Value::as_str).map(String::from))
175            .collect();
176
177        for elem in s_elems {
178            let spdx_id = elem.str_field("spdxId");
179
180            // For packages, apply dedup logic
181            let elem_type = elem.str_field("type");
182            if elem_type.contains("Package") || elem_type.contains("package") {
183                let key = component_key(elem, config);
184                if !seen.insert(key) {
185                    continue;
186                }
187            } else if !seen.insert(spdx_id.to_string()) {
188                continue;
189            }
190
191            p_elems.push(elem.clone());
192        }
193    }
194
195    Ok(())
196}
197
198fn merge_spdx2(
199    primary: &mut Value,
200    secondary: &Value,
201    config: &MergeConfig,
202) -> Result<(), MergeError> {
203    // Merge packages
204    if let (Some(p_pkgs), Some(s_pkgs)) = (
205        primary.get_mut("packages").and_then(Value::as_array_mut),
206        secondary.get("packages").and_then(Value::as_array),
207    ) {
208        if config.dedup_strategy == DeduplicationStrategy::None {
209            for pkg in s_pkgs {
210                p_pkgs.push(pkg.clone());
211            }
212        } else {
213            let mut seen = build_seen_set(p_pkgs, config);
214            for pkg in s_pkgs {
215                let key = component_key(pkg, config);
216                if seen.insert(key) {
217                    p_pkgs.push(pkg.clone());
218                }
219            }
220        }
221    }
222
223    // Merge relationships
224    merge_array_field(primary, secondary, "relationships");
225
226    Ok(())
227}
228
229/// Build a set of dedup keys from existing components
230fn build_seen_set(components: &[Value], config: &MergeConfig) -> HashSet<String> {
231    components
232        .iter()
233        .map(|c| component_key(c, config))
234        .collect()
235}
236
237/// Generate a deduplication key for a component
238fn component_key(comp: &Value, config: &MergeConfig) -> String {
239    match config.dedup_strategy {
240        DeduplicationStrategy::Purl => {
241            // Try purl field directly
242            if let Some(purl) = comp.get("purl").and_then(Value::as_str) {
243                return purl.to_string();
244            }
245            // Try externalReferences for PURL
246            if let Some(refs) = comp.get("externalReferences").and_then(Value::as_array) {
247                for r in refs {
248                    if r.get("type").and_then(Value::as_str) == Some("purl")
249                        && let Some(url) = r.get("url").and_then(Value::as_str)
250                    {
251                        return url.to_string();
252                    }
253                }
254            }
255            // Fall back to name-version
256            name_version_key(comp)
257        }
258        DeduplicationStrategy::Name | DeduplicationStrategy::None => name_version_key(comp),
259    }
260}
261
262fn name_version_key(comp: &Value) -> String {
263    // For cryptographic components, use OID as the dedup key if available
264    if let Some(cp) = comp.get("cryptoProperties")
265        && let Some(oid) = cp.get("oid").and_then(Value::as_str)
266    {
267        let asset_type = cp
268            .get("assetType")
269            .and_then(Value::as_str)
270            .unwrap_or("unknown");
271        return format!("crypto:{asset_type}:{oid}");
272    }
273    let name = comp.str_field("name");
274    let version = comp
275        .get("version")
276        .or_else(|| comp.get("versionInfo"))
277        .and_then(Value::as_str)
278        .unwrap_or("");
279    format!("{name}@{version}")
280}
281
282/// Merge an array field from secondary into primary (append new entries)
283fn merge_array_field(primary: &mut Value, secondary: &Value, field: &str) {
284    if let Some(s_arr) = secondary.get(field).and_then(Value::as_array) {
285        let p_arr = primary.as_object_mut().and_then(|o| {
286            o.entry(field)
287                .or_insert_with(|| Value::Array(Vec::new()))
288                .as_array_mut()
289        });
290        if let Some(p) = p_arr {
291            for item in s_arr {
292                p.push(item.clone());
293            }
294        }
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn merge_cyclonedx_dedup() {
304        let primary = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
305            {"name":"foo","version":"1.0"},
306            {"name":"bar","version":"2.0"}
307        ]}"#;
308
309        let secondary = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
310            {"name":"foo","version":"1.0"},
311            {"name":"baz","version":"3.0"}
312        ]}"#;
313
314        let result = merge_sbom_json(primary, secondary, &MergeConfig::default()).unwrap();
315        let doc: Value = serde_json::from_str(&result).unwrap();
316        let components = doc["components"].as_array().unwrap();
317        assert_eq!(components.len(), 3); // foo, bar, baz (foo deduped)
318    }
319
320    #[test]
321    fn merge_different_formats_fails() {
322        let cdx = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[]}"#;
323        let spdx = r#"{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT","packages":[]}"#;
324
325        let result = merge_sbom_json(cdx, spdx, &MergeConfig::default());
326        assert!(result.is_err());
327    }
328
329    #[test]
330    fn merge_no_dedup() {
331        let a = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
332            {"name":"foo","version":"1.0"}
333        ]}"#;
334        let b = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
335            {"name":"foo","version":"1.0"}
336        ]}"#;
337
338        let config = MergeConfig {
339            dedup_strategy: DeduplicationStrategy::None,
340        };
341        let result = merge_sbom_json(a, b, &config).unwrap();
342        let doc: Value = serde_json::from_str(&result).unwrap();
343        let components = doc["components"].as_array().unwrap();
344        // None strategy keeps all components, including duplicates
345        assert_eq!(components.len(), 2);
346    }
347
348    #[test]
349    fn merge_crypto_oid_dedup() {
350        let primary = r#"{"bomFormat":"CycloneDX","specVersion":"1.6","components":[
351            {"name":"AES-256-GCM","type":"cryptographic-asset","cryptoProperties":{"assetType":"algorithm","oid":"2.16.840.1.101.3.4.1.46"}}
352        ]}"#;
353
354        let secondary = r#"{"bomFormat":"CycloneDX","specVersion":"1.6","components":[
355            {"name":"AES-256-GCM-v2","type":"cryptographic-asset","cryptoProperties":{"assetType":"algorithm","oid":"2.16.840.1.101.3.4.1.46"}},
356            {"name":"SHA-384","type":"cryptographic-asset","cryptoProperties":{"assetType":"algorithm","oid":"2.16.840.1.101.3.4.2.2"}}
357        ]}"#;
358
359        let result = merge_sbom_json(primary, secondary, &MergeConfig::default()).unwrap();
360        let doc: Value = serde_json::from_str(&result).unwrap();
361        let components = doc["components"].as_array().unwrap();
362        // AES-256-GCM-v2 deduped by OID, SHA-384 added → 2 total
363        assert_eq!(components.len(), 2);
364    }
365}