Skip to main content

sbom_model/
lib.rs

1use indexmap::IndexMap;
2use packageurl::PackageUrl;
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5use std::collections::{BTreeMap, BTreeSet};
6use std::str::FromStr;
7
8/// format-agnostic sbom representation.
9#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
10pub struct Sbom {
11    pub metadata: Metadata,
12    pub components: IndexMap<ComponentId, Component>,
13    /// adjacency list: parent -> children
14    pub dependencies: BTreeMap<ComponentId, BTreeSet<ComponentId>>,
15}
16
17impl Default for Sbom {
18    fn default() -> Self {
19        Self {
20            metadata: Metadata::default(),
21            components: IndexMap::new(),
22            dependencies: BTreeMap::new(),
23        }
24    }
25}
26
27/// sbom metadata.
28#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
29pub struct Metadata {
30    pub timestamp: Option<String>,
31    pub tools: Vec<String>,
32    pub authors: Vec<String>,
33}
34
35/// stable identifier for a component.
36#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
37pub struct ComponentId(String);
38
39impl ComponentId {
40    /// create a new id, preferring purl or hashing properties.
41    pub fn new(purl: Option<&str>, properties: &[(&str, &str)]) -> Self {
42        if let Some(purl) = purl {
43            // Try to canonicalize purl
44            if let Ok(parsed) = PackageUrl::from_str(purl) {
45                return ComponentId(parsed.to_string());
46            }
47            return ComponentId(purl.to_string());
48        }
49
50        // Deterministic hash fallback
51        let mut hasher = Sha256::new();
52        for (k, v) in properties {
53            hasher.update(k.as_bytes());
54            hasher.update(b":");
55            hasher.update(v.as_bytes());
56            hasher.update(b"|");
57        }
58        let hash = hex::encode(hasher.finalize());
59        ComponentId(format!("h:{}", hash))
60    }
61
62    pub fn as_str(&self) -> &str {
63        &self.0
64    }
65}
66
67impl std::fmt::Display for ComponentId {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        write!(f, "{}", self.0)
70    }
71}
72
73/// a software component.
74#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
75pub struct Component {
76    pub id: ComponentId,
77    pub name: String,
78    pub version: Option<String>,
79    pub ecosystem: Option<String>,
80    pub supplier: Option<String>,
81    pub description: Option<String>,
82    pub purl: Option<String>,
83    pub licenses: Vec<String>,
84    pub hashes: BTreeMap<String, String>,
85    /// original ids from source document.
86    pub source_ids: Vec<String>,
87}
88
89impl Component {
90    pub fn new(name: String, version: Option<String>) -> Self {
91        let mut props = vec![("name", name.as_str())];
92        if let Some(v) = &version {
93            props.push(("version", v));
94        }
95        let id = ComponentId::new(None, &props);
96
97        Self {
98            id,
99            name,
100            version,
101            ecosystem: None,
102            supplier: None,
103            description: None,
104            purl: None,
105            licenses: Vec::new(),
106            hashes: BTreeMap::new(),
107            source_ids: Vec::new(),
108        }
109    }
110}
111
112// Normalization logic
113impl Sbom {
114    pub fn normalize(&mut self) {
115        // Sort components by ID for deterministic output
116        self.components.sort_keys();
117
118        // Sort dependencies
119        for deps in self.dependencies.values_mut() {
120            // BTreeSet is already sorted
121            // But we might want to ensure consistency if we change container types later
122            let _ = deps;
123        }
124
125        // Normalize components
126        for component in self.components.values_mut() {
127            component.normalize();
128        }
129
130        // Strip volatile metadata
131        self.metadata.timestamp = None;
132        self.metadata.tools.clear();
133        self.metadata.authors.clear(); // Authors might be relevant, but often change slightly. Let's keep strict for now.
134    }
135
136    pub fn roots(&self) -> Vec<ComponentId> {
137        let targets: BTreeSet<_> = self.dependencies.values().flatten().collect();
138        self.components
139            .keys()
140            .filter(|id| !targets.contains(id))
141            .cloned()
142            .collect()
143    }
144
145    pub fn deps(&self, id: &ComponentId) -> Vec<ComponentId> {
146        self.dependencies
147            .get(id)
148            .map(|d| d.iter().cloned().collect())
149            .unwrap_or_default()
150    }
151
152    pub fn rdeps(&self, id: &ComponentId) -> Vec<ComponentId> {
153        self.dependencies
154            .iter()
155            .filter(|(_, children)| children.contains(id))
156            .map(|(parent, _)| parent.clone())
157            .collect()
158    }
159
160    pub fn transitive_deps(&self, id: &ComponentId) -> BTreeSet<ComponentId> {
161        let mut visited = BTreeSet::new();
162        let mut stack = vec![id.clone()];
163        while let Some(current) = stack.pop() {
164            if let Some(children) = self.dependencies.get(&current) {
165                for child in children {
166                    if visited.insert(child.clone()) {
167                        stack.push(child.clone());
168                    }
169                }
170            }
171        }
172        visited
173    }
174
175    pub fn ecosystems(&self) -> BTreeSet<String> {
176        self.components
177            .values()
178            .filter_map(|c| c.ecosystem.clone())
179            .collect()
180    }
181
182    pub fn licenses(&self) -> BTreeSet<String> {
183        self.components
184            .values()
185            .flat_map(|c| c.licenses.iter().cloned())
186            .collect()
187    }
188
189    pub fn missing_hashes(&self) -> Vec<ComponentId> {
190        self.components
191            .iter()
192            .filter(|(_, c)| c.hashes.is_empty())
193            .map(|(id, _)| id.clone())
194            .collect()
195    }
196
197    pub fn by_purl(&self, purl: &str) -> Option<&Component> {
198        self.components
199            .values()
200            .find(|c| c.purl.as_deref() == Some(purl))
201    }
202}
203
204impl Component {
205    pub fn normalize(&mut self) {
206        // Canonicalize licenses (simple sort and dedup for now)
207        self.licenses.sort();
208        self.licenses.dedup();
209
210        // Canonicalize hashes (lowercase)
211        let normalized_hashes: BTreeMap<String, String> = self
212            .hashes
213            .iter()
214            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
215            .collect();
216        self.hashes = normalized_hashes;
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223
224    #[test]
225    fn test_component_id_purl() {
226        let purl = "pkg:npm/left-pad@1.3.0";
227        let id = ComponentId::new(Some(purl), &[]);
228        assert_eq!(id.as_str(), purl);
229    }
230
231    #[test]
232    fn test_component_id_hash_stability() {
233        let props = [("name", "foo"), ("version", "1.0")];
234        let id1 = ComponentId::new(None, &props);
235        let id2 = ComponentId::new(None, &props);
236        assert_eq!(id1, id2);
237        assert!(id1.as_str().starts_with("h:"));
238    }
239
240    #[test]
241    fn test_normalization() {
242        let mut comp = Component::new("test".to_string(), Some("1.0".to_string()));
243        comp.licenses.push("MIT".to_string());
244        comp.licenses.push("MIT".to_string());
245        comp.licenses.push("Apache-2.0".to_string());
246        comp.hashes.insert("SHA-256".to_string(), "ABC".to_string());
247
248        comp.normalize();
249
250        assert_eq!(comp.licenses, vec!["Apache-2.0", "MIT"]);
251        assert_eq!(comp.hashes.get("sha-256").unwrap(), "abc");
252    }
253
254    #[test]
255    fn test_query_api() {
256        let mut sbom = Sbom::default();
257        let c1 = Component::new("a".into(), Some("1".into()));
258        let c2 = Component::new("b".into(), Some("1".into()));
259        let c3 = Component::new("c".into(), Some("1".into()));
260
261        let id1 = c1.id.clone();
262        let id2 = c2.id.clone();
263        let id3 = c3.id.clone();
264
265        sbom.components.insert(id1.clone(), c1);
266        sbom.components.insert(id2.clone(), c2);
267        sbom.components.insert(id3.clone(), c3);
268
269        // id1 -> id2 -> id3
270        sbom.dependencies
271            .entry(id1.clone())
272            .or_default()
273            .insert(id2.clone());
274        sbom.dependencies
275            .entry(id2.clone())
276            .or_default()
277            .insert(id3.clone());
278
279        assert_eq!(sbom.roots(), vec![id1.clone()]);
280        assert_eq!(sbom.deps(&id1), vec![id2.clone()]);
281        assert_eq!(sbom.rdeps(&id2), vec![id1.clone()]);
282
283        let transitive = sbom.transitive_deps(&id1);
284        assert!(transitive.contains(&id2));
285        assert!(transitive.contains(&id3));
286        assert_eq!(transitive.len(), 2);
287
288        assert_eq!(sbom.missing_hashes().len(), 3);
289    }
290}