Skip to main content

sbom_tools/model/
index.rs

1//! Index structures for efficient SBOM querying.
2//!
3//! This module provides `NormalizedSbomIndex`, a precomputed index for fast
4//! lookups and sorting operations on SBOMs. Building the index once avoids
5//! repeated O(n) scans and string allocations during TUI operations.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use sbom_tools::model::{NormalizedSbom, NormalizedSbomIndex};
11//!
12//! let sbom = parse_sbom(&path)?;
13//! let index = NormalizedSbomIndex::build(&sbom);
14//!
15//! // Fast dependency lookup - O(1) instead of O(edges)
16//! let deps = index.dependencies_of(&component_id);
17//!
18//! // Fast case-insensitive name search - O(1) instead of O(components)
19//! let matches = index.find_by_name_lower("openssl");
20//! ```
21
22use super::{CanonicalId, Component, DependencyEdge, NormalizedSbom};
23use std::collections::HashMap;
24
25/// Precomputed index for efficient SBOM queries.
26///
27/// This index is built once per SBOM and provides O(1) lookups for:
28/// - Dependencies of a component (edges by source)
29/// - Dependents of a component (edges by target)
30/// - Components by lowercased name
31/// - Pre-computed sort keys to avoid repeated string allocations
32#[derive(Debug, Clone)]
33pub struct NormalizedSbomIndex {
34    /// Edge indices by source component ID (for fast dependency lookup)
35    edges_by_source: HashMap<CanonicalId, Vec<usize>>,
36    /// Edge indices by target component ID (for fast dependent lookup)
37    edges_by_target: HashMap<CanonicalId, Vec<usize>>,
38    /// Component IDs by lowercased name (for case-insensitive search)
39    by_name_lower: HashMap<String, Vec<CanonicalId>>,
40    /// Pre-computed sort keys for each component
41    sort_keys: HashMap<CanonicalId, ComponentSortKey>,
42    /// Total component count
43    component_count: usize,
44    /// Total edge count
45    edge_count: usize,
46}
47
48/// Pre-computed lowercase strings for sorting without repeated allocations.
49#[derive(Debug, Clone, Default)]
50pub struct ComponentSortKey {
51    /// Lowercased component name
52    pub name_lower: String,
53    /// Lowercased version string
54    pub version_lower: String,
55    /// Lowercased ecosystem name
56    pub ecosystem_lower: String,
57    /// Lowercased canonical ID
58    pub id_lower: String,
59    /// Lowercased PURL (if available)
60    pub purl_lower: String,
61    /// Lowercased group/namespace
62    pub group_lower: String,
63}
64
65impl ComponentSortKey {
66    /// Build sort key from a component
67    pub fn from_component(comp: &Component) -> Self {
68        Self {
69            name_lower: comp.name.to_lowercase(),
70            version_lower: comp.version.as_deref().unwrap_or("").to_lowercase(),
71            ecosystem_lower: comp
72                .ecosystem
73                .as_ref()
74                .map(|e| e.to_string().to_lowercase())
75                .unwrap_or_default(),
76            id_lower: comp.canonical_id.value().to_lowercase(),
77            purl_lower: comp
78                .identifiers
79                .purl
80                .as_deref()
81                .unwrap_or("")
82                .to_lowercase(),
83            group_lower: comp.group.as_deref().unwrap_or("").to_lowercase(),
84        }
85    }
86
87    /// Check if any field contains the query (case-insensitive)
88    pub fn contains(&self, query_lower: &str) -> bool {
89        self.name_lower.contains(query_lower)
90            || self.version_lower.contains(query_lower)
91            || self.purl_lower.contains(query_lower)
92            || self.id_lower.contains(query_lower)
93    }
94
95    /// Check if name contains the query
96    pub fn name_contains(&self, query_lower: &str) -> bool {
97        self.name_lower.contains(query_lower)
98    }
99}
100
101impl NormalizedSbomIndex {
102    /// Build an index from a normalized SBOM.
103    ///
104    /// This is an O(n + m) operation where n = components and m = edges.
105    /// The resulting index provides O(1) lookups.
106    pub fn build(sbom: &NormalizedSbom) -> Self {
107        let mut edges_by_source: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
108        let mut edges_by_target: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
109        let mut by_name_lower: HashMap<String, Vec<CanonicalId>> = HashMap::new();
110        let mut sort_keys: HashMap<CanonicalId, ComponentSortKey> = HashMap::new();
111
112        // Index edges
113        for (idx, edge) in sbom.edges.iter().enumerate() {
114            edges_by_source
115                .entry(edge.from.clone())
116                .or_default()
117                .push(idx);
118            edges_by_target
119                .entry(edge.to.clone())
120                .or_default()
121                .push(idx);
122        }
123
124        // Index components
125        for (id, comp) in &sbom.components {
126            // Index by lowercased name
127            let name_lower = comp.name.to_lowercase();
128            by_name_lower
129                .entry(name_lower)
130                .or_default()
131                .push(id.clone());
132
133            // Build sort key
134            sort_keys.insert(id.clone(), ComponentSortKey::from_component(comp));
135        }
136
137        Self {
138            edges_by_source,
139            edges_by_target,
140            by_name_lower,
141            sort_keys,
142            component_count: sbom.components.len(),
143            edge_count: sbom.edges.len(),
144        }
145    }
146
147    /// Get edge indices for dependencies of a component (outgoing edges).
148    ///
149    /// Returns empty slice if component has no dependencies.
150    /// O(1) lookup instead of O(edges).
151    pub fn dependency_indices(&self, id: &CanonicalId) -> &[usize] {
152        self.edges_by_source
153            .get(id)
154            .map(|v| v.as_slice())
155            .unwrap_or(&[])
156    }
157
158    /// Get edge indices for dependents of a component (incoming edges).
159    ///
160    /// Returns empty slice if component has no dependents.
161    /// O(1) lookup instead of O(edges).
162    pub fn dependent_indices(&self, id: &CanonicalId) -> &[usize] {
163        self.edges_by_target
164            .get(id)
165            .map(|v| v.as_slice())
166            .unwrap_or(&[])
167    }
168
169    /// Get dependencies of a component as edges.
170    ///
171    /// O(k) where k = number of dependencies (much faster than O(edges)).
172    pub fn dependencies_of<'a>(
173        &self,
174        id: &CanonicalId,
175        edges: &'a [DependencyEdge],
176    ) -> Vec<&'a DependencyEdge> {
177        self.dependency_indices(id)
178            .iter()
179            .filter_map(|&idx| edges.get(idx))
180            .collect()
181    }
182
183    /// Get dependents of a component as edges.
184    ///
185    /// O(k) where k = number of dependents (much faster than O(edges)).
186    pub fn dependents_of<'a>(
187        &self,
188        id: &CanonicalId,
189        edges: &'a [DependencyEdge],
190    ) -> Vec<&'a DependencyEdge> {
191        self.dependent_indices(id)
192            .iter()
193            .filter_map(|&idx| edges.get(idx))
194            .collect()
195    }
196
197    /// Find component IDs by lowercased name.
198    ///
199    /// O(1) lookup instead of O(components).
200    pub fn find_by_name_lower(&self, name_lower: &str) -> &[CanonicalId] {
201        self.by_name_lower
202            .get(name_lower)
203            .map(|v| v.as_slice())
204            .unwrap_or(&[])
205    }
206
207    /// Find component IDs whose name contains the query (case-insensitive).
208    ///
209    /// O(unique_names) - still iterates but only over unique lowercased names,
210    /// not all components.
211    pub fn search_by_name(&self, query_lower: &str) -> Vec<CanonicalId> {
212        self.by_name_lower
213            .iter()
214            .filter(|(name, _)| name.contains(query_lower))
215            .flat_map(|(_, ids)| ids.iter().cloned())
216            .collect()
217    }
218
219    /// Get the pre-computed sort key for a component.
220    ///
221    /// O(1) lookup, avoids repeated to_lowercase() calls during sorting.
222    pub fn sort_key(&self, id: &CanonicalId) -> Option<&ComponentSortKey> {
223        self.sort_keys.get(id)
224    }
225
226    /// Get all sort keys for iteration.
227    pub fn sort_keys(&self) -> &HashMap<CanonicalId, ComponentSortKey> {
228        &self.sort_keys
229    }
230
231    /// Check if component has any dependencies.
232    pub fn has_dependencies(&self, id: &CanonicalId) -> bool {
233        self.edges_by_source
234            .get(id)
235            .map(|v| !v.is_empty())
236            .unwrap_or(false)
237    }
238
239    /// Check if component has any dependents.
240    pub fn has_dependents(&self, id: &CanonicalId) -> bool {
241        self.edges_by_target
242            .get(id)
243            .map(|v| !v.is_empty())
244            .unwrap_or(false)
245    }
246
247    /// Get count of dependencies for a component.
248    pub fn dependency_count(&self, id: &CanonicalId) -> usize {
249        self.edges_by_source
250            .get(id)
251            .map(|v| v.len())
252            .unwrap_or(0)
253    }
254
255    /// Get count of dependents for a component.
256    pub fn dependent_count(&self, id: &CanonicalId) -> usize {
257        self.edges_by_target.get(id).map(|v| v.len()).unwrap_or(0)
258    }
259
260    /// Get total component count.
261    pub fn component_count(&self) -> usize {
262        self.component_count
263    }
264
265    /// Get total edge count.
266    pub fn edge_count(&self) -> usize {
267        self.edge_count
268    }
269
270    /// Get count of root components (no incoming edges).
271    pub fn root_count(&self) -> usize {
272        self.component_count
273            .saturating_sub(self.edges_by_target.len())
274            + self
275                .edges_by_target
276                .values()
277                .filter(|v| v.is_empty())
278                .count()
279    }
280
281    /// Get count of leaf components (no outgoing edges).
282    pub fn leaf_count(&self) -> usize {
283        self.component_count
284            .saturating_sub(self.edges_by_source.len())
285            + self
286                .edges_by_source
287                .values()
288                .filter(|v| v.is_empty())
289                .count()
290    }
291}
292
293/// Builder for creating indexes with optional features.
294#[derive(Debug, Default)]
295pub struct SbomIndexBuilder {
296    /// Whether to build name index
297    index_names: bool,
298    /// Whether to build sort keys
299    build_sort_keys: bool,
300}
301
302impl SbomIndexBuilder {
303    /// Create a new builder with all features enabled.
304    pub fn new() -> Self {
305        Self {
306            index_names: true,
307            build_sort_keys: true,
308        }
309    }
310
311    /// Create a minimal builder (edges only).
312    pub fn minimal() -> Self {
313        Self {
314            index_names: false,
315            build_sort_keys: false,
316        }
317    }
318
319    /// Enable name indexing.
320    pub fn with_name_index(mut self) -> Self {
321        self.index_names = true;
322        self
323    }
324
325    /// Enable sort key building.
326    pub fn with_sort_keys(mut self) -> Self {
327        self.build_sort_keys = true;
328        self
329    }
330
331    /// Build the index with configured options.
332    pub fn build(&self, sbom: &NormalizedSbom) -> NormalizedSbomIndex {
333        let mut edges_by_source: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
334        let mut edges_by_target: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
335        let mut by_name_lower: HashMap<String, Vec<CanonicalId>> = HashMap::new();
336        let mut sort_keys: HashMap<CanonicalId, ComponentSortKey> = HashMap::new();
337
338        // Always index edges
339        for (idx, edge) in sbom.edges.iter().enumerate() {
340            edges_by_source
341                .entry(edge.from.clone())
342                .or_default()
343                .push(idx);
344            edges_by_target
345                .entry(edge.to.clone())
346                .or_default()
347                .push(idx);
348        }
349
350        // Optionally index components
351        if self.index_names || self.build_sort_keys {
352            for (id, comp) in &sbom.components {
353                if self.index_names {
354                    let name_lower = comp.name.to_lowercase();
355                    by_name_lower
356                        .entry(name_lower)
357                        .or_default()
358                        .push(id.clone());
359                }
360
361                if self.build_sort_keys {
362                    sort_keys.insert(id.clone(), ComponentSortKey::from_component(comp));
363                }
364            }
365        }
366
367        NormalizedSbomIndex {
368            edges_by_source,
369            edges_by_target,
370            by_name_lower,
371            sort_keys,
372            component_count: sbom.components.len(),
373            edge_count: sbom.edges.len(),
374        }
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    use crate::model::{DependencyType, DocumentMetadata};
382
383    fn make_test_sbom() -> NormalizedSbom {
384        let mut sbom = NormalizedSbom::new(DocumentMetadata::default());
385
386        // Add components
387        let comp_a = Component::new("ComponentA".to_string(), "comp-a".to_string());
388        let comp_b = Component::new("ComponentB".to_string(), "comp-b".to_string());
389        let comp_c = Component::new("componentb".to_string(), "comp-c".to_string()); // Same name lowercase
390
391        let id_a = comp_a.canonical_id.clone();
392        let id_b = comp_b.canonical_id.clone();
393        let id_c = comp_c.canonical_id.clone();
394
395        sbom.add_component(comp_a);
396        sbom.add_component(comp_b);
397        sbom.add_component(comp_c);
398
399        // Add edges: A -> B, A -> C
400        sbom.add_edge(DependencyEdge::new(
401            id_a.clone(),
402            id_b.clone(),
403            DependencyType::DependsOn,
404        ));
405        sbom.add_edge(DependencyEdge::new(
406            id_a.clone(),
407            id_c.clone(),
408            DependencyType::DependsOn,
409        ));
410
411        sbom
412    }
413
414    #[test]
415    fn test_build_index() {
416        let sbom = make_test_sbom();
417        let index = NormalizedSbomIndex::build(&sbom);
418
419        assert_eq!(index.component_count(), 3);
420        assert_eq!(index.edge_count(), 2);
421    }
422
423    #[test]
424    fn test_dependency_lookup() {
425        let sbom = make_test_sbom();
426        let index = NormalizedSbomIndex::build(&sbom);
427
428        let comp_a_id = sbom.components.keys().next().unwrap();
429
430        // A has 2 dependencies
431        assert_eq!(index.dependency_count(comp_a_id), 2);
432
433        // Get actual edges
434        let deps = index.dependencies_of(comp_a_id, &sbom.edges);
435        assert_eq!(deps.len(), 2);
436    }
437
438    #[test]
439    fn test_dependent_lookup() {
440        let sbom = make_test_sbom();
441        let index = NormalizedSbomIndex::build(&sbom);
442
443        // B and C should have 1 dependent each (A)
444        let comp_b_id = sbom.components.keys().nth(1).unwrap();
445        assert_eq!(index.dependent_count(comp_b_id), 1);
446    }
447
448    #[test]
449    fn test_name_lookup() {
450        let sbom = make_test_sbom();
451        let index = NormalizedSbomIndex::build(&sbom);
452
453        // "componentb" should match both ComponentB and componentb
454        let matches = index.find_by_name_lower("componentb");
455        assert_eq!(matches.len(), 2);
456    }
457
458    #[test]
459    fn test_name_search() {
460        let sbom = make_test_sbom();
461        let index = NormalizedSbomIndex::build(&sbom);
462
463        // Search for "component" should match all 3
464        let matches = index.search_by_name("component");
465        assert_eq!(matches.len(), 3);
466    }
467
468    #[test]
469    fn test_sort_keys() {
470        let sbom = make_test_sbom();
471        let index = NormalizedSbomIndex::build(&sbom);
472
473        let comp_a_id = sbom.components.keys().next().unwrap();
474        let sort_key = index.sort_key(comp_a_id).unwrap();
475
476        assert_eq!(sort_key.name_lower, "componenta");
477    }
478
479    #[test]
480    fn test_sort_key_contains() {
481        let mut comp = Component::new("MyPackage".to_string(), "pkg-1".to_string());
482        comp.version = Some("1.2.3".to_string());
483        let key = ComponentSortKey::from_component(&comp);
484
485        assert!(key.contains("mypack"));
486        assert!(key.contains("1.2.3"));
487        assert!(!key.contains("notfound"));
488    }
489
490    #[test]
491    fn test_builder_minimal() {
492        let sbom = make_test_sbom();
493        let index = SbomIndexBuilder::minimal().build(&sbom);
494
495        // Edges should still be indexed
496        assert_eq!(index.edge_count(), 2);
497
498        // But name lookup returns empty (not indexed)
499        let matches = index.find_by_name_lower("componenta");
500        assert!(matches.is_empty());
501    }
502
503    #[test]
504    fn test_empty_sbom() {
505        let sbom = NormalizedSbom::default();
506        let index = NormalizedSbomIndex::build(&sbom);
507
508        assert_eq!(index.component_count(), 0);
509        assert_eq!(index.edge_count(), 0);
510    }
511}