Skip to main content

sbom_tools/serialization/
pruner.rs

1//! SBOM tailoring / filtering.
2//!
3//! Removes components from an SBOM based on filter criteria,
4//! preserving the original format structure.
5
6use crate::model::{LicenseFamily, NormalizedSbom};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9
10/// Configuration for SBOM tailoring
11#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12pub struct TailorConfig {
13    /// Include only components matching these license families
14    pub include_license_families: Vec<LicenseFamily>,
15    /// Exclude components matching these ecosystems
16    pub exclude_ecosystems: Vec<String>,
17    /// Include only these component types (library, application, etc.)
18    pub include_types: Vec<String>,
19    /// Include only components matching this name pattern
20    pub include_name_pattern: Option<String>,
21    /// Strip vulnerability data from output
22    pub strip_vulns: bool,
23    /// Strip extension/property data
24    pub strip_extensions: bool,
25}
26
27/// Tailor (filter) an SBOM by removing components that don't match the criteria.
28///
29/// Operates on raw JSON to preserve original format structure.
30///
31/// # Errors
32///
33/// Returns error if JSON parsing fails.
34pub fn tailor_sbom_json(
35    raw_json: &str,
36    sbom: &NormalizedSbom,
37    config: &TailorConfig,
38) -> anyhow::Result<String> {
39    let mut doc: Value = serde_json::from_str(raw_json)?;
40
41    // Collect component names/IDs to remove
42    let mut remove_ids: Vec<String> = Vec::new();
43
44    for comp in sbom.components.values() {
45        let mut keep = true;
46
47        // Filter by license family
48        if !config.include_license_families.is_empty() {
49            let family = comp
50                .licenses
51                .declared
52                .first()
53                .map(|l| l.family())
54                .unwrap_or(LicenseFamily::Other);
55            if !config.include_license_families.contains(&family) {
56                keep = false;
57            }
58        }
59
60        // Filter by ecosystem
61        if !config.exclude_ecosystems.is_empty() {
62            if let Some(eco) = &comp.ecosystem {
63                let eco_str = format!("{eco:?}").to_lowercase();
64                if config
65                    .exclude_ecosystems
66                    .iter()
67                    .any(|e| e.to_lowercase() == eco_str)
68                {
69                    keep = false;
70                }
71            }
72        }
73
74        // Filter by component type
75        if !config.include_types.is_empty() {
76            let type_str = format!("{:?}", comp.component_type).to_lowercase();
77            if !config
78                .include_types
79                .iter()
80                .any(|t| t.to_lowercase() == type_str)
81            {
82                keep = false;
83            }
84        }
85
86        // Filter by name pattern
87        if let Some(pattern) = &config.include_name_pattern {
88            let pattern_lower = pattern.to_lowercase();
89            if !comp.name.to_lowercase().contains(&pattern_lower) {
90                keep = false;
91            }
92        }
93
94        if !keep {
95            // Track both format_id and name for removal
96            if !comp.identifiers.format_id.is_empty() {
97                remove_ids.push(comp.identifiers.format_id.clone());
98            }
99            remove_ids.push(comp.name.clone());
100        }
101    }
102
103    // Prune from CycloneDX
104    if doc.get("bomFormat").is_some() {
105        prune_cyclonedx(&mut doc, &remove_ids, config);
106    } else if doc.get("@context").is_some() {
107        prune_spdx3(&mut doc, &remove_ids, config);
108    } else {
109        prune_spdx2(&mut doc, &remove_ids, config);
110    }
111
112    Ok(serde_json::to_string_pretty(&doc)?)
113}
114
115fn prune_cyclonedx(doc: &mut Value, remove_ids: &[String], config: &TailorConfig) {
116    // Remove components
117    if let Some(components) = doc.get_mut("components").and_then(Value::as_array_mut) {
118        components.retain(|comp| {
119            let name = comp.get("name").and_then(Value::as_str).unwrap_or("");
120            let bom_ref = comp.get("bom-ref").and_then(Value::as_str).unwrap_or("");
121            !remove_ids.iter().any(|id| id == name || id == bom_ref)
122        });
123    }
124
125    // Remove corresponding dependency entries
126    if let Some(deps) = doc.get_mut("dependencies").and_then(Value::as_array_mut) {
127        deps.retain(|dep| {
128            let ref_val = dep.get("ref").and_then(Value::as_str).unwrap_or("");
129            !remove_ids.iter().any(|id| id == ref_val)
130        });
131
132        // Also remove from dependsOn arrays
133        for dep in deps.iter_mut() {
134            if let Some(depends_on) = dep.get_mut("dependsOn").and_then(Value::as_array_mut) {
135                depends_on.retain(|d| {
136                    let s = d.as_str().unwrap_or("");
137                    !remove_ids.iter().any(|id| id == s)
138                });
139            }
140        }
141    }
142
143    // Strip vulnerabilities if requested
144    if config.strip_vulns {
145        doc.as_object_mut().map(|o| o.remove("vulnerabilities"));
146    }
147
148    // Strip extensions/properties if requested
149    if config.strip_extensions {
150        if let Some(components) = doc.get_mut("components").and_then(Value::as_array_mut) {
151            for comp in components {
152                comp.as_object_mut().map(|o| o.remove("properties"));
153            }
154        }
155    }
156}
157
158fn prune_spdx3(doc: &mut Value, remove_ids: &[String], config: &TailorConfig) {
159    let key = if doc.get("element").is_some() {
160        "element"
161    } else {
162        "@graph"
163    };
164    let elements = doc.get_mut(key).and_then(Value::as_array_mut);
165
166    if let Some(elems) = elements {
167        elems.retain(|elem| {
168            let name = elem.get("name").and_then(Value::as_str).unwrap_or("");
169            let elem_type = elem.get("type").and_then(Value::as_str).unwrap_or("");
170
171            // Only filter software packages, keep relationships and other elements
172            if !elem_type.contains("Package") && !elem_type.contains("package") {
173                // If stripping vulns, also remove vulnerability elements
174                if config.strip_vulns && elem_type.contains("Vulnerability") {
175                    return false;
176                }
177                return true;
178            }
179
180            !remove_ids.iter().any(|id| id == name)
181        });
182    }
183}
184
185fn prune_spdx2(doc: &mut Value, remove_ids: &[String], config: &TailorConfig) {
186    // Remove packages
187    if let Some(packages) = doc.get_mut("packages").and_then(Value::as_array_mut) {
188        packages.retain(|pkg| {
189            let name = pkg.get("name").and_then(Value::as_str).unwrap_or("");
190            let spdx_id = pkg.get("SPDXID").and_then(Value::as_str).unwrap_or("");
191            !remove_ids.iter().any(|id| id == name || id == spdx_id)
192        });
193    }
194
195    // Remove relationships referencing removed packages
196    if let Some(rels) = doc.get_mut("relationships").and_then(Value::as_array_mut) {
197        rels.retain(|rel| {
198            let elem = rel
199                .get("spdxElementId")
200                .and_then(Value::as_str)
201                .unwrap_or("");
202            let related = rel
203                .get("relatedSpdxElement")
204                .and_then(Value::as_str)
205                .unwrap_or("");
206            !remove_ids.iter().any(|id| id == elem || id == related)
207        });
208    }
209
210    if config.strip_vulns {
211        doc.as_object_mut().map(|o| o.remove("annotations"));
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use crate::model::Component;
219
220    #[test]
221    fn tailor_by_name_pattern() {
222        let raw = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
223            {"name":"keep-me","version":"1.0"},
224            {"name":"remove-me","version":"2.0"}
225        ]}"#;
226
227        let mut sbom = NormalizedSbom::default();
228        let keep = Component::new("keep-me".to_string(), "id-keep".to_string());
229        let remove = Component::new("remove-me".to_string(), "id-remove".to_string());
230        sbom.components.insert(keep.canonical_id.clone(), keep);
231        sbom.components.insert(remove.canonical_id.clone(), remove);
232
233        let config = TailorConfig {
234            include_name_pattern: Some("keep".to_string()),
235            ..Default::default()
236        };
237
238        let result = tailor_sbom_json(raw, &sbom, &config).unwrap();
239        assert!(result.contains("keep-me"));
240        assert!(!result.contains("remove-me"));
241    }
242
243    #[test]
244    fn strip_vulns() {
245        let raw = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[],"vulnerabilities":[{"id":"CVE-1"}]}"#;
246        let sbom = NormalizedSbom::default();
247        let config = TailorConfig {
248            strip_vulns: true,
249            ..Default::default()
250        };
251
252        let result = tailor_sbom_json(raw, &sbom, &config).unwrap();
253        assert!(!result.contains("vulnerabilities"));
254    }
255}