Skip to main content

sbom_tools/model/
index.rs

1//! Index structures for efficient SBOM querying.
2//!
3//! This module provides `NormalizedSbomIndex`, a precomputed index for fast
4//! lookups and sorting operations on SBOMs. Building the index once avoids
5//! repeated O(n) scans and string allocations during TUI operations.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use sbom_tools::model::{NormalizedSbom, NormalizedSbomIndex};
11//!
12//! let sbom = parse_sbom(&path)?;
13//! let index = NormalizedSbomIndex::build(&sbom);
14//!
15//! // Fast dependency lookup - O(1) instead of O(edges)
16//! let deps = index.dependencies_of(&component_id);
17//!
18//! // Fast case-insensitive name search - O(1) instead of O(components)
19//! let matches = index.find_by_name_lower("openssl");
20//! ```
21
22use super::{CanonicalId, Component, DependencyEdge, NormalizedSbom};
23use std::collections::HashMap;
24
25/// Precomputed index for efficient SBOM queries.
26///
27/// This index is built once per SBOM and provides O(1) lookups for:
28/// - Dependencies of a component (edges by source)
29/// - Dependents of a component (edges by target)
30/// - Components by lowercased name
31/// - Pre-computed sort keys to avoid repeated string allocations
32#[derive(Debug, Clone)]
33#[must_use]
34pub struct NormalizedSbomIndex {
35    /// Edge indices by source component ID (for fast dependency lookup)
36    edges_by_source: HashMap<CanonicalId, Vec<usize>>,
37    /// Edge indices by target component ID (for fast dependent lookup)
38    edges_by_target: HashMap<CanonicalId, Vec<usize>>,
39    /// Component IDs by lowercased name (for case-insensitive search)
40    by_name_lower: HashMap<String, Vec<CanonicalId>>,
41    /// Pre-computed sort keys for each component
42    sort_keys: HashMap<CanonicalId, ComponentSortKey>,
43    /// Bom-ref (format_id) to canonical ID mapping for crypto cross-reference resolution
44    bom_ref_map: HashMap<String, CanonicalId>,
45    /// Total component count
46    component_count: usize,
47    /// Total edge count
48    edge_count: usize,
49}
50
51/// Pre-computed lowercase strings for sorting without repeated allocations.
52#[derive(Debug, Clone, Default)]
53pub struct ComponentSortKey {
54    /// Lowercased component name
55    pub name_lower: String,
56    /// Lowercased version string
57    pub version_lower: String,
58    /// Lowercased ecosystem name
59    pub ecosystem_lower: String,
60    /// Lowercased canonical ID
61    pub id_lower: String,
62    /// Lowercased PURL (if available)
63    pub purl_lower: String,
64    /// Lowercased group/namespace
65    pub group_lower: String,
66}
67
68impl ComponentSortKey {
69    /// Build sort key from a component
70    #[must_use]
71    pub fn from_component(comp: &Component) -> Self {
72        Self {
73            name_lower: comp.name.to_lowercase(),
74            version_lower: comp.version.as_deref().unwrap_or("").to_lowercase(),
75            ecosystem_lower: comp
76                .ecosystem
77                .as_ref()
78                .map(|e| e.to_string().to_lowercase())
79                .unwrap_or_default(),
80            id_lower: comp.canonical_id.value().to_lowercase(),
81            purl_lower: comp
82                .identifiers
83                .purl
84                .as_deref()
85                .unwrap_or("")
86                .to_lowercase(),
87            group_lower: comp.group.as_deref().unwrap_or("").to_lowercase(),
88        }
89    }
90
91    /// Check if any field contains the query (case-insensitive)
92    #[must_use]
93    pub fn contains(&self, query_lower: &str) -> bool {
94        self.name_lower.contains(query_lower)
95            || self.version_lower.contains(query_lower)
96            || self.purl_lower.contains(query_lower)
97            || self.id_lower.contains(query_lower)
98    }
99
100    /// Check if name contains the query
101    #[must_use]
102    pub fn name_contains(&self, query_lower: &str) -> bool {
103        self.name_lower.contains(query_lower)
104    }
105}
106
107impl NormalizedSbomIndex {
108    /// Build an index from a normalized SBOM.
109    ///
110    /// This is an O(n + m) operation where n = components and m = edges.
111    /// The resulting index provides O(1) lookups.
112    pub fn build(sbom: &NormalizedSbom) -> Self {
113        let mut edges_by_source: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
114        let mut edges_by_target: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
115        let mut by_name_lower: HashMap<String, Vec<CanonicalId>> = HashMap::new();
116        let mut sort_keys: HashMap<CanonicalId, ComponentSortKey> = HashMap::new();
117        let mut bom_ref_map: HashMap<String, CanonicalId> = HashMap::new();
118
119        // Index edges
120        for (idx, edge) in sbom.edges.iter().enumerate() {
121            edges_by_source
122                .entry(edge.from.clone())
123                .or_default()
124                .push(idx);
125            edges_by_target
126                .entry(edge.to.clone())
127                .or_default()
128                .push(idx);
129        }
130
131        // Index components
132        for (id, comp) in &sbom.components {
133            // Index by lowercased name
134            let name_lower = comp.name.to_lowercase();
135            by_name_lower
136                .entry(name_lower)
137                .or_default()
138                .push(id.clone());
139
140            // Build sort key
141            sort_keys.insert(id.clone(), ComponentSortKey::from_component(comp));
142
143            // Index bom-ref (format_id) for crypto cross-reference resolution
144            let format_id = &comp.identifiers.format_id;
145            if !format_id.is_empty() {
146                bom_ref_map.insert(format_id.clone(), id.clone());
147            }
148        }
149
150        Self {
151            edges_by_source,
152            edges_by_target,
153            by_name_lower,
154            sort_keys,
155            bom_ref_map,
156            component_count: sbom.components.len(),
157            edge_count: sbom.edges.len(),
158        }
159    }
160
161    /// Get edge indices for dependencies of a component (outgoing edges).
162    ///
163    /// Returns empty slice if component has no dependencies.
164    /// O(1) lookup instead of O(edges).
165    pub fn dependency_indices(&self, id: &CanonicalId) -> &[usize] {
166        self.edges_by_source
167            .get(id)
168            .map_or(&[], std::vec::Vec::as_slice)
169    }
170
171    /// Get edge indices for dependents of a component (incoming edges).
172    ///
173    /// Returns empty slice if component has no dependents.
174    /// O(1) lookup instead of O(edges).
175    pub fn dependent_indices(&self, id: &CanonicalId) -> &[usize] {
176        self.edges_by_target
177            .get(id)
178            .map_or(&[], std::vec::Vec::as_slice)
179    }
180
181    /// Get dependencies of a component as edges.
182    ///
183    /// O(k) where k = number of dependencies (much faster than O(edges)).
184    #[must_use]
185    pub fn dependencies_of<'a>(
186        &self,
187        id: &CanonicalId,
188        edges: &'a [DependencyEdge],
189    ) -> Vec<&'a DependencyEdge> {
190        self.dependency_indices(id)
191            .iter()
192            .filter_map(|&idx| edges.get(idx))
193            .collect()
194    }
195
196    /// Get dependents of a component as edges.
197    ///
198    /// O(k) where k = number of dependents (much faster than O(edges)).
199    #[must_use]
200    pub fn dependents_of<'a>(
201        &self,
202        id: &CanonicalId,
203        edges: &'a [DependencyEdge],
204    ) -> Vec<&'a DependencyEdge> {
205        self.dependent_indices(id)
206            .iter()
207            .filter_map(|&idx| edges.get(idx))
208            .collect()
209    }
210
211    /// Find component IDs by lowercased name.
212    ///
213    /// O(1) lookup instead of O(components).
214    pub fn find_by_name_lower(&self, name_lower: &str) -> &[CanonicalId] {
215        self.by_name_lower
216            .get(name_lower)
217            .map_or(&[], std::vec::Vec::as_slice)
218    }
219
220    /// Find component IDs whose name contains the query (case-insensitive).
221    ///
222    /// `O(unique_names)` - still iterates but only over unique lowercased names,
223    /// not all components.
224    #[must_use]
225    pub fn search_by_name(&self, query_lower: &str) -> Vec<CanonicalId> {
226        self.by_name_lower
227            .iter()
228            .filter(|(name, _)| name.contains(query_lower))
229            .flat_map(|(_, ids)| ids.iter().cloned())
230            .collect()
231    }
232
233    /// Get the pre-computed sort key for a component.
234    ///
235    /// O(1) lookup, avoids repeated `to_lowercase()` calls during sorting.
236    #[must_use]
237    pub fn sort_key(&self, id: &CanonicalId) -> Option<&ComponentSortKey> {
238        self.sort_keys.get(id)
239    }
240
241    /// Get all sort keys for iteration.
242    #[must_use]
243    pub const fn sort_keys(&self) -> &HashMap<CanonicalId, ComponentSortKey> {
244        &self.sort_keys
245    }
246
247    /// Resolve a bom-ref string (used in crypto cross-references like
248    /// `signatureAlgorithmRef`, `subjectPublicKeyRef`, `algorithmRef`)
249    /// to the canonical component ID.
250    #[must_use]
251    pub fn resolve_bom_ref(&self, bom_ref: &str) -> Option<&CanonicalId> {
252        self.bom_ref_map.get(bom_ref)
253    }
254
255    /// Check if component has any dependencies.
256    #[must_use]
257    pub fn has_dependencies(&self, id: &CanonicalId) -> bool {
258        self.edges_by_source.get(id).is_some_and(|v| !v.is_empty())
259    }
260
261    /// Check if component has any dependents.
262    #[must_use]
263    pub fn has_dependents(&self, id: &CanonicalId) -> bool {
264        self.edges_by_target.get(id).is_some_and(|v| !v.is_empty())
265    }
266
267    /// Get count of dependencies for a component.
268    pub fn dependency_count(&self, id: &CanonicalId) -> usize {
269        self.edges_by_source.get(id).map_or(0, std::vec::Vec::len)
270    }
271
272    /// Get count of dependents for a component.
273    pub fn dependent_count(&self, id: &CanonicalId) -> usize {
274        self.edges_by_target.get(id).map_or(0, std::vec::Vec::len)
275    }
276
277    /// Get total component count.
278    #[must_use]
279    pub const fn component_count(&self) -> usize {
280        self.component_count
281    }
282
283    /// Get total edge count.
284    #[must_use]
285    pub const fn edge_count(&self) -> usize {
286        self.edge_count
287    }
288
289    /// Get count of root components (no incoming edges).
290    #[must_use]
291    pub fn root_count(&self) -> usize {
292        self.component_count
293            .saturating_sub(self.edges_by_target.len())
294            + self
295                .edges_by_target
296                .values()
297                .filter(|v| v.is_empty())
298                .count()
299    }
300
301    /// Get count of leaf components (no outgoing edges).
302    #[must_use]
303    pub fn leaf_count(&self) -> usize {
304        self.component_count
305            .saturating_sub(self.edges_by_source.len())
306            + self
307                .edges_by_source
308                .values()
309                .filter(|v| v.is_empty())
310                .count()
311    }
312}
313
314/// Builder for creating indexes with optional features.
315#[derive(Debug, Default)]
316#[must_use]
317pub struct SbomIndexBuilder {
318    /// Whether to build name index
319    index_names: bool,
320    /// Whether to build sort keys
321    build_sort_keys: bool,
322}
323
324impl SbomIndexBuilder {
325    /// Create a new builder with all features enabled.
326    pub const fn new() -> Self {
327        Self {
328            index_names: true,
329            build_sort_keys: true,
330        }
331    }
332
333    /// Create a minimal builder (edges only).
334    pub const fn minimal() -> Self {
335        Self {
336            index_names: false,
337            build_sort_keys: false,
338        }
339    }
340
341    /// Enable name indexing.
342    pub const fn with_name_index(mut self) -> Self {
343        self.index_names = true;
344        self
345    }
346
347    /// Enable sort key building.
348    pub const fn with_sort_keys(mut self) -> Self {
349        self.build_sort_keys = true;
350        self
351    }
352
353    /// Build the index with configured options.
354    pub fn build(&self, sbom: &NormalizedSbom) -> NormalizedSbomIndex {
355        let mut edges_by_source: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
356        let mut edges_by_target: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
357        let mut by_name_lower: HashMap<String, Vec<CanonicalId>> = HashMap::new();
358        let mut sort_keys: HashMap<CanonicalId, ComponentSortKey> = HashMap::new();
359        let mut bom_ref_map: HashMap<String, CanonicalId> = HashMap::new();
360
361        // Always index edges
362        for (idx, edge) in sbom.edges.iter().enumerate() {
363            edges_by_source
364                .entry(edge.from.clone())
365                .or_default()
366                .push(idx);
367            edges_by_target
368                .entry(edge.to.clone())
369                .or_default()
370                .push(idx);
371        }
372
373        // Optionally index components
374        if self.index_names || self.build_sort_keys {
375            for (id, comp) in &sbom.components {
376                if self.index_names {
377                    let name_lower = comp.name.to_lowercase();
378                    by_name_lower
379                        .entry(name_lower)
380                        .or_default()
381                        .push(id.clone());
382                }
383
384                if self.build_sort_keys {
385                    sort_keys.insert(id.clone(), ComponentSortKey::from_component(comp));
386                }
387
388                // Always index bom-refs for crypto cross-reference resolution
389                let format_id = &comp.identifiers.format_id;
390                if !format_id.is_empty() {
391                    bom_ref_map.insert(format_id.clone(), id.clone());
392                }
393            }
394        }
395
396        NormalizedSbomIndex {
397            edges_by_source,
398            edges_by_target,
399            by_name_lower,
400            sort_keys,
401            bom_ref_map,
402            component_count: sbom.components.len(),
403            edge_count: sbom.edges.len(),
404        }
405    }
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    use crate::model::{DependencyType, DocumentMetadata};
412
413    fn make_test_sbom() -> NormalizedSbom {
414        let mut sbom = NormalizedSbom::new(DocumentMetadata::default());
415
416        // Add components
417        let comp_a = Component::new("ComponentA".to_string(), "comp-a".to_string());
418        let comp_b = Component::new("ComponentB".to_string(), "comp-b".to_string());
419        let comp_c = Component::new("componentb".to_string(), "comp-c".to_string()); // Same name lowercase
420
421        let id_a = comp_a.canonical_id.clone();
422        let id_b = comp_b.canonical_id.clone();
423        let id_c = comp_c.canonical_id.clone();
424
425        sbom.add_component(comp_a);
426        sbom.add_component(comp_b);
427        sbom.add_component(comp_c);
428
429        // Add edges: A -> B, A -> C
430        sbom.add_edge(DependencyEdge::new(
431            id_a.clone(),
432            id_b.clone(),
433            DependencyType::DependsOn,
434        ));
435        sbom.add_edge(DependencyEdge::new(
436            id_a.clone(),
437            id_c.clone(),
438            DependencyType::DependsOn,
439        ));
440
441        sbom
442    }
443
444    #[test]
445    fn test_build_index() {
446        let sbom = make_test_sbom();
447        let index = NormalizedSbomIndex::build(&sbom);
448
449        assert_eq!(index.component_count(), 3);
450        assert_eq!(index.edge_count(), 2);
451    }
452
453    #[test]
454    fn test_dependency_lookup() {
455        let sbom = make_test_sbom();
456        let index = NormalizedSbomIndex::build(&sbom);
457
458        let comp_a_id = sbom.components.keys().next().unwrap();
459
460        // A has 2 dependencies
461        assert_eq!(index.dependency_count(comp_a_id), 2);
462
463        // Get actual edges
464        let deps = index.dependencies_of(comp_a_id, &sbom.edges);
465        assert_eq!(deps.len(), 2);
466    }
467
468    #[test]
469    fn test_dependent_lookup() {
470        let sbom = make_test_sbom();
471        let index = NormalizedSbomIndex::build(&sbom);
472
473        // B and C should have 1 dependent each (A)
474        let comp_b_id = sbom.components.keys().nth(1).unwrap();
475        assert_eq!(index.dependent_count(comp_b_id), 1);
476    }
477
478    #[test]
479    fn test_name_lookup() {
480        let sbom = make_test_sbom();
481        let index = NormalizedSbomIndex::build(&sbom);
482
483        // "componentb" should match both ComponentB and componentb
484        let matches = index.find_by_name_lower("componentb");
485        assert_eq!(matches.len(), 2);
486    }
487
488    #[test]
489    fn test_name_search() {
490        let sbom = make_test_sbom();
491        let index = NormalizedSbomIndex::build(&sbom);
492
493        // Search for "component" should match all 3
494        let matches = index.search_by_name("component");
495        assert_eq!(matches.len(), 3);
496    }
497
498    #[test]
499    fn test_sort_keys() {
500        let sbom = make_test_sbom();
501        let index = NormalizedSbomIndex::build(&sbom);
502
503        let comp_a_id = sbom.components.keys().next().unwrap();
504        let sort_key = index.sort_key(comp_a_id).unwrap();
505
506        assert_eq!(sort_key.name_lower, "componenta");
507    }
508
509    #[test]
510    fn test_sort_key_contains() {
511        let mut comp = Component::new("MyPackage".to_string(), "pkg-1".to_string());
512        comp.version = Some("1.2.3".to_string());
513        let key = ComponentSortKey::from_component(&comp);
514
515        assert!(key.contains("mypack"));
516        assert!(key.contains("1.2.3"));
517        assert!(!key.contains("notfound"));
518    }
519
520    #[test]
521    fn test_builder_minimal() {
522        let sbom = make_test_sbom();
523        let index = SbomIndexBuilder::minimal().build(&sbom);
524
525        // Edges should still be indexed
526        assert_eq!(index.edge_count(), 2);
527
528        // But name lookup returns empty (not indexed)
529        let matches = index.find_by_name_lower("componenta");
530        assert!(matches.is_empty());
531    }
532
533    #[test]
534    fn test_empty_sbom() {
535        let sbom = NormalizedSbom::default();
536        let index = NormalizedSbomIndex::build(&sbom);
537
538        assert_eq!(index.component_count(), 0);
539        assert_eq!(index.edge_count(), 0);
540    }
541}