Skip to main content

sbom_tools/serialization/
pruner.rs

1//! SBOM tailoring / filtering.
2//!
3//! Removes components from an SBOM based on filter criteria,
4//! preserving the original format structure.
5
6use crate::model::{LicenseFamily, NormalizedSbom};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9
10use super::ValueExt;
11
12/// Configuration for SBOM tailoring
13#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14pub struct TailorConfig {
15    /// Include only components matching these license families
16    pub include_license_families: Vec<LicenseFamily>,
17    /// Exclude components matching these ecosystems
18    pub exclude_ecosystems: Vec<String>,
19    /// Include only these component types (library, application, etc.)
20    pub include_types: Vec<String>,
21    /// Include only components matching this name pattern
22    pub include_name_pattern: Option<String>,
23    /// Include only these crypto asset types (algorithm, certificate, key, protocol)
24    pub include_crypto_types: Vec<String>,
25    /// Strip vulnerability data from output
26    pub strip_vulns: bool,
27    /// Strip extension/property data
28    pub strip_extensions: bool,
29}
30
31/// Tailor (filter) an SBOM by removing components that don't match the criteria.
32///
33/// Operates on raw JSON to preserve original format structure.
34///
35/// # Errors
36///
37/// Returns error if JSON parsing fails.
38pub fn tailor_sbom_json(
39    raw_json: &str,
40    sbom: &NormalizedSbom,
41    config: &TailorConfig,
42) -> anyhow::Result<String> {
43    let mut doc: Value = serde_json::from_str(raw_json)?;
44
45    // Collect component names/IDs to remove
46    let mut remove_ids: Vec<String> = Vec::new();
47
48    for comp in sbom.components.values() {
49        let mut keep = true;
50
51        // Filter by license family
52        if !config.include_license_families.is_empty() {
53            let family = comp
54                .licenses
55                .declared
56                .first()
57                .map(|l| l.family())
58                .unwrap_or(LicenseFamily::Other);
59            if !config.include_license_families.contains(&family) {
60                keep = false;
61            }
62        }
63
64        // Filter by ecosystem
65        if !config.exclude_ecosystems.is_empty()
66            && let Some(eco) = &comp.ecosystem
67        {
68            let eco_str = format!("{eco:?}").to_lowercase();
69            if config
70                .exclude_ecosystems
71                .iter()
72                .any(|e| e.to_lowercase() == eco_str)
73            {
74                keep = false;
75            }
76        }
77
78        // Filter by component type
79        if !config.include_types.is_empty() {
80            let type_str = format!("{:?}", comp.component_type).to_lowercase();
81            if !config
82                .include_types
83                .iter()
84                .any(|t| t.to_lowercase() == type_str)
85            {
86                keep = false;
87            }
88        }
89
90        // Filter by name pattern
91        if let Some(pattern) = &config.include_name_pattern {
92            let pattern_lower = pattern.to_lowercase();
93            if !comp.name.to_lowercase().contains(&pattern_lower) {
94                keep = false;
95            }
96        }
97
98        // Filter by crypto asset type
99        if !config.include_crypto_types.is_empty() {
100            if let Some(cp) = &comp.crypto_properties {
101                let asset_str = cp.asset_type.to_string().to_lowercase();
102                if !config
103                    .include_crypto_types
104                    .iter()
105                    .any(|t| t.to_lowercase() == asset_str)
106                {
107                    keep = false;
108                }
109            } else {
110                // No crypto properties — exclude if we're filtering by crypto type
111                keep = false;
112            }
113        }
114
115        if !keep {
116            // Track both format_id and name for removal
117            if !comp.identifiers.format_id.is_empty() {
118                remove_ids.push(comp.identifiers.format_id.clone());
119            }
120            remove_ids.push(comp.name.clone());
121        }
122    }
123
124    // Prune from CycloneDX
125    if doc.get("bomFormat").is_some() {
126        prune_cyclonedx(&mut doc, &remove_ids, config);
127    } else if doc.get("@context").is_some() {
128        prune_spdx3(&mut doc, &remove_ids, config);
129    } else {
130        prune_spdx2(&mut doc, &remove_ids, config);
131    }
132
133    Ok(serde_json::to_string_pretty(&doc)?)
134}
135
136fn prune_cyclonedx(doc: &mut Value, remove_ids: &[String], config: &TailorConfig) {
137    // Remove components
138    if let Some(components) = doc.get_mut("components").and_then(Value::as_array_mut) {
139        components.retain(|comp| {
140            let name = comp.str_field("name");
141            let bom_ref = comp.str_field("bom-ref");
142            !remove_ids.iter().any(|id| id == name || id == bom_ref)
143        });
144    }
145
146    // Remove corresponding dependency entries
147    if let Some(deps) = doc.get_mut("dependencies").and_then(Value::as_array_mut) {
148        deps.retain(|dep| {
149            let ref_val = dep.str_field("ref");
150            !remove_ids.iter().any(|id| id == ref_val)
151        });
152
153        // Also remove from dependsOn arrays
154        for dep in deps.iter_mut() {
155            if let Some(depends_on) = dep.get_mut("dependsOn").and_then(Value::as_array_mut) {
156                depends_on.retain(|d| {
157                    let s = d.as_str().unwrap_or("");
158                    !remove_ids.iter().any(|id| id == s)
159                });
160            }
161        }
162    }
163
164    // Strip vulnerabilities if requested
165    if config.strip_vulns {
166        doc.as_object_mut().map(|o| o.remove("vulnerabilities"));
167    }
168
169    // Strip extensions/properties if requested
170    if config.strip_extensions
171        && let Some(components) = doc.get_mut("components").and_then(Value::as_array_mut)
172    {
173        for comp in components {
174            comp.as_object_mut().map(|o| o.remove("properties"));
175        }
176    }
177}
178
179fn prune_spdx3(doc: &mut Value, remove_ids: &[String], config: &TailorConfig) {
180    let key = if doc.get("element").is_some() {
181        "element"
182    } else {
183        "@graph"
184    };
185    let elements = doc.get_mut(key).and_then(Value::as_array_mut);
186
187    if let Some(elems) = elements {
188        elems.retain(|elem| {
189            let name = elem.str_field("name");
190            let elem_type = elem.str_field("type");
191
192            // Only filter software packages, keep relationships and other elements
193            if !elem_type.contains("Package") && !elem_type.contains("package") {
194                // If stripping vulns, also remove vulnerability elements
195                if config.strip_vulns && elem_type.contains("Vulnerability") {
196                    return false;
197                }
198                return true;
199            }
200
201            !remove_ids.iter().any(|id| id == name)
202        });
203    }
204}
205
206fn prune_spdx2(doc: &mut Value, remove_ids: &[String], config: &TailorConfig) {
207    // Remove packages
208    if let Some(packages) = doc.get_mut("packages").and_then(Value::as_array_mut) {
209        packages.retain(|pkg| {
210            let name = pkg.str_field("name");
211            let spdx_id = pkg.str_field("SPDXID");
212            !remove_ids.iter().any(|id| id == name || id == spdx_id)
213        });
214    }
215
216    // Remove relationships referencing removed packages
217    if let Some(rels) = doc.get_mut("relationships").and_then(Value::as_array_mut) {
218        rels.retain(|rel| {
219            let elem = rel
220                .get("spdxElementId")
221                .and_then(Value::as_str)
222                .unwrap_or("");
223            let related = rel
224                .get("relatedSpdxElement")
225                .and_then(Value::as_str)
226                .unwrap_or("");
227            !remove_ids.iter().any(|id| id == elem || id == related)
228        });
229    }
230
231    if config.strip_vulns {
232        doc.as_object_mut().map(|o| o.remove("annotations"));
233    }
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use crate::model::Component;
240
241    #[test]
242    fn tailor_by_name_pattern() {
243        let raw = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[
244            {"name":"keep-me","version":"1.0"},
245            {"name":"remove-me","version":"2.0"}
246        ]}"#;
247
248        let mut sbom = NormalizedSbom::default();
249        let keep = Component::new("keep-me".to_string(), "id-keep".to_string());
250        let remove = Component::new("remove-me".to_string(), "id-remove".to_string());
251        sbom.components.insert(keep.canonical_id.clone(), keep);
252        sbom.components.insert(remove.canonical_id.clone(), remove);
253
254        let config = TailorConfig {
255            include_name_pattern: Some("keep".to_string()),
256            ..Default::default()
257        };
258
259        let result = tailor_sbom_json(raw, &sbom, &config).unwrap();
260        assert!(result.contains("keep-me"));
261        assert!(!result.contains("remove-me"));
262    }
263
264    #[test]
265    fn strip_vulns() {
266        let raw = r#"{"bomFormat":"CycloneDX","specVersion":"1.5","components":[],"vulnerabilities":[{"id":"CVE-1"}]}"#;
267        let sbom = NormalizedSbom::default();
268        let config = TailorConfig {
269            strip_vulns: true,
270            ..Default::default()
271        };
272
273        let result = tailor_sbom_json(raw, &sbom, &config).unwrap();
274        assert!(!result.contains("vulnerabilities"));
275    }
276}