Skip to main content

sbom_tools/model/
index.rs

1//! Index structures for efficient SBOM querying.
2//!
3//! This module provides `NormalizedSbomIndex`, a precomputed index for fast
4//! lookups and sorting operations on SBOMs. Building the index once avoids
5//! repeated O(n) scans and string allocations during TUI operations.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use sbom_tools::model::{NormalizedSbom, NormalizedSbomIndex};
11//!
12//! let sbom = parse_sbom(&path)?;
13//! let index = NormalizedSbomIndex::build(&sbom);
14//!
15//! // Fast dependency lookup - O(1) instead of O(edges)
16//! let deps = index.dependencies_of(&component_id);
17//!
18//! // Fast case-insensitive name search - O(1) instead of O(components)
19//! let matches = index.find_by_name_lower("openssl");
20//! ```
21
22use super::{CanonicalId, Component, DependencyEdge, NormalizedSbom};
23use std::collections::HashMap;
24
25/// Precomputed index for efficient SBOM queries.
26///
27/// This index is built once per SBOM and provides O(1) lookups for:
28/// - Dependencies of a component (edges by source)
29/// - Dependents of a component (edges by target)
30/// - Components by lowercased name
31/// - Pre-computed sort keys to avoid repeated string allocations
32#[derive(Debug, Clone)]
33#[must_use]
34pub struct NormalizedSbomIndex {
35    /// Edge indices by source component ID (for fast dependency lookup)
36    edges_by_source: HashMap<CanonicalId, Vec<usize>>,
37    /// Edge indices by target component ID (for fast dependent lookup)
38    edges_by_target: HashMap<CanonicalId, Vec<usize>>,
39    /// Component IDs by lowercased name (for case-insensitive search)
40    by_name_lower: HashMap<String, Vec<CanonicalId>>,
41    /// Pre-computed sort keys for each component
42    sort_keys: HashMap<CanonicalId, ComponentSortKey>,
43    /// Total component count
44    component_count: usize,
45    /// Total edge count
46    edge_count: usize,
47}
48
49/// Pre-computed lowercase strings for sorting without repeated allocations.
50#[derive(Debug, Clone, Default)]
51pub struct ComponentSortKey {
52    /// Lowercased component name
53    pub name_lower: String,
54    /// Lowercased version string
55    pub version_lower: String,
56    /// Lowercased ecosystem name
57    pub ecosystem_lower: String,
58    /// Lowercased canonical ID
59    pub id_lower: String,
60    /// Lowercased PURL (if available)
61    pub purl_lower: String,
62    /// Lowercased group/namespace
63    pub group_lower: String,
64}
65
66impl ComponentSortKey {
67    /// Build sort key from a component
68    #[must_use] 
69    pub fn from_component(comp: &Component) -> Self {
70        Self {
71            name_lower: comp.name.to_lowercase(),
72            version_lower: comp.version.as_deref().unwrap_or("").to_lowercase(),
73            ecosystem_lower: comp
74                .ecosystem
75                .as_ref()
76                .map(|e| e.to_string().to_lowercase())
77                .unwrap_or_default(),
78            id_lower: comp.canonical_id.value().to_lowercase(),
79            purl_lower: comp
80                .identifiers
81                .purl
82                .as_deref()
83                .unwrap_or("")
84                .to_lowercase(),
85            group_lower: comp.group.as_deref().unwrap_or("").to_lowercase(),
86        }
87    }
88
89    /// Check if any field contains the query (case-insensitive)
90    #[must_use] 
91    pub fn contains(&self, query_lower: &str) -> bool {
92        self.name_lower.contains(query_lower)
93            || self.version_lower.contains(query_lower)
94            || self.purl_lower.contains(query_lower)
95            || self.id_lower.contains(query_lower)
96    }
97
98    /// Check if name contains the query
99    #[must_use] 
100    pub fn name_contains(&self, query_lower: &str) -> bool {
101        self.name_lower.contains(query_lower)
102    }
103}
104
105impl NormalizedSbomIndex {
106    /// Build an index from a normalized SBOM.
107    ///
108    /// This is an O(n + m) operation where n = components and m = edges.
109    /// The resulting index provides O(1) lookups.
110    pub fn build(sbom: &NormalizedSbom) -> Self {
111        let mut edges_by_source: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
112        let mut edges_by_target: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
113        let mut by_name_lower: HashMap<String, Vec<CanonicalId>> = HashMap::new();
114        let mut sort_keys: HashMap<CanonicalId, ComponentSortKey> = HashMap::new();
115
116        // Index edges
117        for (idx, edge) in sbom.edges.iter().enumerate() {
118            edges_by_source
119                .entry(edge.from.clone())
120                .or_default()
121                .push(idx);
122            edges_by_target
123                .entry(edge.to.clone())
124                .or_default()
125                .push(idx);
126        }
127
128        // Index components
129        for (id, comp) in &sbom.components {
130            // Index by lowercased name
131            let name_lower = comp.name.to_lowercase();
132            by_name_lower
133                .entry(name_lower)
134                .or_default()
135                .push(id.clone());
136
137            // Build sort key
138            sort_keys.insert(id.clone(), ComponentSortKey::from_component(comp));
139        }
140
141        Self {
142            edges_by_source,
143            edges_by_target,
144            by_name_lower,
145            sort_keys,
146            component_count: sbom.components.len(),
147            edge_count: sbom.edges.len(),
148        }
149    }
150
151    /// Get edge indices for dependencies of a component (outgoing edges).
152    ///
153    /// Returns empty slice if component has no dependencies.
154    /// O(1) lookup instead of O(edges).
155    pub fn dependency_indices(&self, id: &CanonicalId) -> &[usize] {
156        self.edges_by_source
157            .get(id)
158            .map_or(&[], std::vec::Vec::as_slice)
159    }
160
161    /// Get edge indices for dependents of a component (incoming edges).
162    ///
163    /// Returns empty slice if component has no dependents.
164    /// O(1) lookup instead of O(edges).
165    pub fn dependent_indices(&self, id: &CanonicalId) -> &[usize] {
166        self.edges_by_target
167            .get(id)
168            .map_or(&[], std::vec::Vec::as_slice)
169    }
170
171    /// Get dependencies of a component as edges.
172    ///
173    /// O(k) where k = number of dependencies (much faster than O(edges)).
174    #[must_use] 
175    pub fn dependencies_of<'a>(
176        &self,
177        id: &CanonicalId,
178        edges: &'a [DependencyEdge],
179    ) -> Vec<&'a DependencyEdge> {
180        self.dependency_indices(id)
181            .iter()
182            .filter_map(|&idx| edges.get(idx))
183            .collect()
184    }
185
186    /// Get dependents of a component as edges.
187    ///
188    /// O(k) where k = number of dependents (much faster than O(edges)).
189    #[must_use] 
190    pub fn dependents_of<'a>(
191        &self,
192        id: &CanonicalId,
193        edges: &'a [DependencyEdge],
194    ) -> Vec<&'a DependencyEdge> {
195        self.dependent_indices(id)
196            .iter()
197            .filter_map(|&idx| edges.get(idx))
198            .collect()
199    }
200
201    /// Find component IDs by lowercased name.
202    ///
203    /// O(1) lookup instead of O(components).
204    pub fn find_by_name_lower(&self, name_lower: &str) -> &[CanonicalId] {
205        self.by_name_lower
206            .get(name_lower)
207            .map_or(&[], std::vec::Vec::as_slice)
208    }
209
210    /// Find component IDs whose name contains the query (case-insensitive).
211    ///
212    /// `O(unique_names)` - still iterates but only over unique lowercased names,
213    /// not all components.
214    #[must_use] 
215    pub fn search_by_name(&self, query_lower: &str) -> Vec<CanonicalId> {
216        self.by_name_lower
217            .iter()
218            .filter(|(name, _)| name.contains(query_lower))
219            .flat_map(|(_, ids)| ids.iter().cloned())
220            .collect()
221    }
222
223    /// Get the pre-computed sort key for a component.
224    ///
225    /// O(1) lookup, avoids repeated `to_lowercase()` calls during sorting.
226    #[must_use] 
227    pub fn sort_key(&self, id: &CanonicalId) -> Option<&ComponentSortKey> {
228        self.sort_keys.get(id)
229    }
230
231    /// Get all sort keys for iteration.
232    #[must_use] 
233    pub const fn sort_keys(&self) -> &HashMap<CanonicalId, ComponentSortKey> {
234        &self.sort_keys
235    }
236
237    /// Check if component has any dependencies.
238    #[must_use] 
239    pub fn has_dependencies(&self, id: &CanonicalId) -> bool {
240        self.edges_by_source
241            .get(id)
242            .is_some_and(|v| !v.is_empty())
243    }
244
245    /// Check if component has any dependents.
246    #[must_use] 
247    pub fn has_dependents(&self, id: &CanonicalId) -> bool {
248        self.edges_by_target
249            .get(id)
250            .is_some_and(|v| !v.is_empty())
251    }
252
253    /// Get count of dependencies for a component.
254    pub fn dependency_count(&self, id: &CanonicalId) -> usize {
255        self.edges_by_source
256            .get(id)
257            .map_or(0, std::vec::Vec::len)
258    }
259
260    /// Get count of dependents for a component.
261    pub fn dependent_count(&self, id: &CanonicalId) -> usize {
262        self.edges_by_target.get(id).map_or(0, std::vec::Vec::len)
263    }
264
265    /// Get total component count.
266    #[must_use] 
267    pub const fn component_count(&self) -> usize {
268        self.component_count
269    }
270
271    /// Get total edge count.
272    #[must_use] 
273    pub const fn edge_count(&self) -> usize {
274        self.edge_count
275    }
276
277    /// Get count of root components (no incoming edges).
278    #[must_use] 
279    pub fn root_count(&self) -> usize {
280        self.component_count
281            .saturating_sub(self.edges_by_target.len())
282            + self
283                .edges_by_target
284                .values()
285                .filter(|v| v.is_empty())
286                .count()
287    }
288
289    /// Get count of leaf components (no outgoing edges).
290    #[must_use] 
291    pub fn leaf_count(&self) -> usize {
292        self.component_count
293            .saturating_sub(self.edges_by_source.len())
294            + self
295                .edges_by_source
296                .values()
297                .filter(|v| v.is_empty())
298                .count()
299    }
300}
301
302/// Builder for creating indexes with optional features.
303#[derive(Debug, Default)]
304#[must_use]
305pub struct SbomIndexBuilder {
306    /// Whether to build name index
307    index_names: bool,
308    /// Whether to build sort keys
309    build_sort_keys: bool,
310}
311
312impl SbomIndexBuilder {
313    /// Create a new builder with all features enabled.
314    pub const fn new() -> Self {
315        Self {
316            index_names: true,
317            build_sort_keys: true,
318        }
319    }
320
321    /// Create a minimal builder (edges only).
322    pub const fn minimal() -> Self {
323        Self {
324            index_names: false,
325            build_sort_keys: false,
326        }
327    }
328
329    /// Enable name indexing.
330    pub const fn with_name_index(mut self) -> Self {
331        self.index_names = true;
332        self
333    }
334
335    /// Enable sort key building.
336    pub const fn with_sort_keys(mut self) -> Self {
337        self.build_sort_keys = true;
338        self
339    }
340
341    /// Build the index with configured options.
342    pub fn build(&self, sbom: &NormalizedSbom) -> NormalizedSbomIndex {
343        let mut edges_by_source: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
344        let mut edges_by_target: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
345        let mut by_name_lower: HashMap<String, Vec<CanonicalId>> = HashMap::new();
346        let mut sort_keys: HashMap<CanonicalId, ComponentSortKey> = HashMap::new();
347
348        // Always index edges
349        for (idx, edge) in sbom.edges.iter().enumerate() {
350            edges_by_source
351                .entry(edge.from.clone())
352                .or_default()
353                .push(idx);
354            edges_by_target
355                .entry(edge.to.clone())
356                .or_default()
357                .push(idx);
358        }
359
360        // Optionally index components
361        if self.index_names || self.build_sort_keys {
362            for (id, comp) in &sbom.components {
363                if self.index_names {
364                    let name_lower = comp.name.to_lowercase();
365                    by_name_lower
366                        .entry(name_lower)
367                        .or_default()
368                        .push(id.clone());
369                }
370
371                if self.build_sort_keys {
372                    sort_keys.insert(id.clone(), ComponentSortKey::from_component(comp));
373                }
374            }
375        }
376
377        NormalizedSbomIndex {
378            edges_by_source,
379            edges_by_target,
380            by_name_lower,
381            sort_keys,
382            component_count: sbom.components.len(),
383            edge_count: sbom.edges.len(),
384        }
385    }
386}
387
388#[cfg(test)]
389mod tests {
390    use super::*;
391    use crate::model::{DependencyType, DocumentMetadata};
392
393    fn make_test_sbom() -> NormalizedSbom {
394        let mut sbom = NormalizedSbom::new(DocumentMetadata::default());
395
396        // Add components
397        let comp_a = Component::new("ComponentA".to_string(), "comp-a".to_string());
398        let comp_b = Component::new("ComponentB".to_string(), "comp-b".to_string());
399        let comp_c = Component::new("componentb".to_string(), "comp-c".to_string()); // Same name lowercase
400
401        let id_a = comp_a.canonical_id.clone();
402        let id_b = comp_b.canonical_id.clone();
403        let id_c = comp_c.canonical_id.clone();
404
405        sbom.add_component(comp_a);
406        sbom.add_component(comp_b);
407        sbom.add_component(comp_c);
408
409        // Add edges: A -> B, A -> C
410        sbom.add_edge(DependencyEdge::new(
411            id_a.clone(),
412            id_b.clone(),
413            DependencyType::DependsOn,
414        ));
415        sbom.add_edge(DependencyEdge::new(
416            id_a.clone(),
417            id_c.clone(),
418            DependencyType::DependsOn,
419        ));
420
421        sbom
422    }
423
424    #[test]
425    fn test_build_index() {
426        let sbom = make_test_sbom();
427        let index = NormalizedSbomIndex::build(&sbom);
428
429        assert_eq!(index.component_count(), 3);
430        assert_eq!(index.edge_count(), 2);
431    }
432
433    #[test]
434    fn test_dependency_lookup() {
435        let sbom = make_test_sbom();
436        let index = NormalizedSbomIndex::build(&sbom);
437
438        let comp_a_id = sbom.components.keys().next().unwrap();
439
440        // A has 2 dependencies
441        assert_eq!(index.dependency_count(comp_a_id), 2);
442
443        // Get actual edges
444        let deps = index.dependencies_of(comp_a_id, &sbom.edges);
445        assert_eq!(deps.len(), 2);
446    }
447
448    #[test]
449    fn test_dependent_lookup() {
450        let sbom = make_test_sbom();
451        let index = NormalizedSbomIndex::build(&sbom);
452
453        // B and C should have 1 dependent each (A)
454        let comp_b_id = sbom.components.keys().nth(1).unwrap();
455        assert_eq!(index.dependent_count(comp_b_id), 1);
456    }
457
458    #[test]
459    fn test_name_lookup() {
460        let sbom = make_test_sbom();
461        let index = NormalizedSbomIndex::build(&sbom);
462
463        // "componentb" should match both ComponentB and componentb
464        let matches = index.find_by_name_lower("componentb");
465        assert_eq!(matches.len(), 2);
466    }
467
468    #[test]
469    fn test_name_search() {
470        let sbom = make_test_sbom();
471        let index = NormalizedSbomIndex::build(&sbom);
472
473        // Search for "component" should match all 3
474        let matches = index.search_by_name("component");
475        assert_eq!(matches.len(), 3);
476    }
477
478    #[test]
479    fn test_sort_keys() {
480        let sbom = make_test_sbom();
481        let index = NormalizedSbomIndex::build(&sbom);
482
483        let comp_a_id = sbom.components.keys().next().unwrap();
484        let sort_key = index.sort_key(comp_a_id).unwrap();
485
486        assert_eq!(sort_key.name_lower, "componenta");
487    }
488
489    #[test]
490    fn test_sort_key_contains() {
491        let mut comp = Component::new("MyPackage".to_string(), "pkg-1".to_string());
492        comp.version = Some("1.2.3".to_string());
493        let key = ComponentSortKey::from_component(&comp);
494
495        assert!(key.contains("mypack"));
496        assert!(key.contains("1.2.3"));
497        assert!(!key.contains("notfound"));
498    }
499
500    #[test]
501    fn test_builder_minimal() {
502        let sbom = make_test_sbom();
503        let index = SbomIndexBuilder::minimal().build(&sbom);
504
505        // Edges should still be indexed
506        assert_eq!(index.edge_count(), 2);
507
508        // But name lookup returns empty (not indexed)
509        let matches = index.find_by_name_lower("componenta");
510        assert!(matches.is_empty());
511    }
512
513    #[test]
514    fn test_empty_sbom() {
515        let sbom = NormalizedSbom::default();
516        let index = NormalizedSbomIndex::build(&sbom);
517
518        assert_eq!(index.component_count(), 0);
519        assert_eq!(index.edge_count(), 0);
520    }
521}