Skip to main content

sbom_tools/model/
index.rs

1//! Index structures for efficient SBOM querying.
2//!
3//! This module provides `NormalizedSbomIndex`, a precomputed index for fast
4//! lookups and sorting operations on SBOMs. Building the index once avoids
5//! repeated O(n) scans and string allocations during TUI operations.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use sbom_tools::model::{NormalizedSbom, NormalizedSbomIndex};
11//!
12//! let sbom = parse_sbom(&path)?;
13//! let index = NormalizedSbomIndex::build(&sbom);
14//!
15//! // Fast dependency lookup - O(1) instead of O(edges)
16//! let deps = index.dependencies_of(&component_id);
17//!
18//! // Fast case-insensitive name search - O(1) instead of O(components)
19//! let matches = index.find_by_name_lower("openssl");
20//! ```
21
22use super::{CanonicalId, Component, DependencyEdge, NormalizedSbom};
23use std::collections::HashMap;
24
25/// Precomputed index for efficient SBOM queries.
26///
27/// This index is built once per SBOM and provides O(1) lookups for:
28/// - Dependencies of a component (edges by source)
29/// - Dependents of a component (edges by target)
30/// - Components by lowercased name
31/// - Pre-computed sort keys to avoid repeated string allocations
32#[derive(Debug, Clone)]
33#[must_use]
34pub struct NormalizedSbomIndex {
35    /// Edge indices by source component ID (for fast dependency lookup)
36    edges_by_source: HashMap<CanonicalId, Vec<usize>>,
37    /// Edge indices by target component ID (for fast dependent lookup)
38    edges_by_target: HashMap<CanonicalId, Vec<usize>>,
39    /// Component IDs by lowercased name (for case-insensitive search)
40    by_name_lower: HashMap<String, Vec<CanonicalId>>,
41    /// Pre-computed sort keys for each component
42    sort_keys: HashMap<CanonicalId, ComponentSortKey>,
43    /// Total component count
44    component_count: usize,
45    /// Total edge count
46    edge_count: usize,
47}
48
49/// Pre-computed lowercase strings for sorting without repeated allocations.
50#[derive(Debug, Clone, Default)]
51pub struct ComponentSortKey {
52    /// Lowercased component name
53    pub name_lower: String,
54    /// Lowercased version string
55    pub version_lower: String,
56    /// Lowercased ecosystem name
57    pub ecosystem_lower: String,
58    /// Lowercased canonical ID
59    pub id_lower: String,
60    /// Lowercased PURL (if available)
61    pub purl_lower: String,
62    /// Lowercased group/namespace
63    pub group_lower: String,
64}
65
66impl ComponentSortKey {
67    /// Build sort key from a component
68    #[must_use]
69    pub fn from_component(comp: &Component) -> Self {
70        Self {
71            name_lower: comp.name.to_lowercase(),
72            version_lower: comp.version.as_deref().unwrap_or("").to_lowercase(),
73            ecosystem_lower: comp
74                .ecosystem
75                .as_ref()
76                .map(|e| e.to_string().to_lowercase())
77                .unwrap_or_default(),
78            id_lower: comp.canonical_id.value().to_lowercase(),
79            purl_lower: comp
80                .identifiers
81                .purl
82                .as_deref()
83                .unwrap_or("")
84                .to_lowercase(),
85            group_lower: comp.group.as_deref().unwrap_or("").to_lowercase(),
86        }
87    }
88
89    /// Check if any field contains the query (case-insensitive)
90    #[must_use]
91    pub fn contains(&self, query_lower: &str) -> bool {
92        self.name_lower.contains(query_lower)
93            || self.version_lower.contains(query_lower)
94            || self.purl_lower.contains(query_lower)
95            || self.id_lower.contains(query_lower)
96    }
97
98    /// Check if name contains the query
99    #[must_use]
100    pub fn name_contains(&self, query_lower: &str) -> bool {
101        self.name_lower.contains(query_lower)
102    }
103}
104
105impl NormalizedSbomIndex {
106    /// Build an index from a normalized SBOM.
107    ///
108    /// This is an O(n + m) operation where n = components and m = edges.
109    /// The resulting index provides O(1) lookups.
110    pub fn build(sbom: &NormalizedSbom) -> Self {
111        let mut edges_by_source: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
112        let mut edges_by_target: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
113        let mut by_name_lower: HashMap<String, Vec<CanonicalId>> = HashMap::new();
114        let mut sort_keys: HashMap<CanonicalId, ComponentSortKey> = HashMap::new();
115
116        // Index edges
117        for (idx, edge) in sbom.edges.iter().enumerate() {
118            edges_by_source
119                .entry(edge.from.clone())
120                .or_default()
121                .push(idx);
122            edges_by_target
123                .entry(edge.to.clone())
124                .or_default()
125                .push(idx);
126        }
127
128        // Index components
129        for (id, comp) in &sbom.components {
130            // Index by lowercased name
131            let name_lower = comp.name.to_lowercase();
132            by_name_lower
133                .entry(name_lower)
134                .or_default()
135                .push(id.clone());
136
137            // Build sort key
138            sort_keys.insert(id.clone(), ComponentSortKey::from_component(comp));
139        }
140
141        Self {
142            edges_by_source,
143            edges_by_target,
144            by_name_lower,
145            sort_keys,
146            component_count: sbom.components.len(),
147            edge_count: sbom.edges.len(),
148        }
149    }
150
151    /// Get edge indices for dependencies of a component (outgoing edges).
152    ///
153    /// Returns empty slice if component has no dependencies.
154    /// O(1) lookup instead of O(edges).
155    pub fn dependency_indices(&self, id: &CanonicalId) -> &[usize] {
156        self.edges_by_source
157            .get(id)
158            .map_or(&[], std::vec::Vec::as_slice)
159    }
160
161    /// Get edge indices for dependents of a component (incoming edges).
162    ///
163    /// Returns empty slice if component has no dependents.
164    /// O(1) lookup instead of O(edges).
165    pub fn dependent_indices(&self, id: &CanonicalId) -> &[usize] {
166        self.edges_by_target
167            .get(id)
168            .map_or(&[], std::vec::Vec::as_slice)
169    }
170
171    /// Get dependencies of a component as edges.
172    ///
173    /// O(k) where k = number of dependencies (much faster than O(edges)).
174    #[must_use]
175    pub fn dependencies_of<'a>(
176        &self,
177        id: &CanonicalId,
178        edges: &'a [DependencyEdge],
179    ) -> Vec<&'a DependencyEdge> {
180        self.dependency_indices(id)
181            .iter()
182            .filter_map(|&idx| edges.get(idx))
183            .collect()
184    }
185
186    /// Get dependents of a component as edges.
187    ///
188    /// O(k) where k = number of dependents (much faster than O(edges)).
189    #[must_use]
190    pub fn dependents_of<'a>(
191        &self,
192        id: &CanonicalId,
193        edges: &'a [DependencyEdge],
194    ) -> Vec<&'a DependencyEdge> {
195        self.dependent_indices(id)
196            .iter()
197            .filter_map(|&idx| edges.get(idx))
198            .collect()
199    }
200
201    /// Find component IDs by lowercased name.
202    ///
203    /// O(1) lookup instead of O(components).
204    pub fn find_by_name_lower(&self, name_lower: &str) -> &[CanonicalId] {
205        self.by_name_lower
206            .get(name_lower)
207            .map_or(&[], std::vec::Vec::as_slice)
208    }
209
210    /// Find component IDs whose name contains the query (case-insensitive).
211    ///
212    /// `O(unique_names)` - still iterates but only over unique lowercased names,
213    /// not all components.
214    #[must_use]
215    pub fn search_by_name(&self, query_lower: &str) -> Vec<CanonicalId> {
216        self.by_name_lower
217            .iter()
218            .filter(|(name, _)| name.contains(query_lower))
219            .flat_map(|(_, ids)| ids.iter().cloned())
220            .collect()
221    }
222
223    /// Get the pre-computed sort key for a component.
224    ///
225    /// O(1) lookup, avoids repeated `to_lowercase()` calls during sorting.
226    #[must_use]
227    pub fn sort_key(&self, id: &CanonicalId) -> Option<&ComponentSortKey> {
228        self.sort_keys.get(id)
229    }
230
231    /// Get all sort keys for iteration.
232    #[must_use]
233    pub const fn sort_keys(&self) -> &HashMap<CanonicalId, ComponentSortKey> {
234        &self.sort_keys
235    }
236
237    /// Check if component has any dependencies.
238    #[must_use]
239    pub fn has_dependencies(&self, id: &CanonicalId) -> bool {
240        self.edges_by_source.get(id).is_some_and(|v| !v.is_empty())
241    }
242
243    /// Check if component has any dependents.
244    #[must_use]
245    pub fn has_dependents(&self, id: &CanonicalId) -> bool {
246        self.edges_by_target.get(id).is_some_and(|v| !v.is_empty())
247    }
248
249    /// Get count of dependencies for a component.
250    pub fn dependency_count(&self, id: &CanonicalId) -> usize {
251        self.edges_by_source.get(id).map_or(0, std::vec::Vec::len)
252    }
253
254    /// Get count of dependents for a component.
255    pub fn dependent_count(&self, id: &CanonicalId) -> usize {
256        self.edges_by_target.get(id).map_or(0, std::vec::Vec::len)
257    }
258
259    /// Get total component count.
260    #[must_use]
261    pub const fn component_count(&self) -> usize {
262        self.component_count
263    }
264
265    /// Get total edge count.
266    #[must_use]
267    pub const fn edge_count(&self) -> usize {
268        self.edge_count
269    }
270
271    /// Get count of root components (no incoming edges).
272    #[must_use]
273    pub fn root_count(&self) -> usize {
274        self.component_count
275            .saturating_sub(self.edges_by_target.len())
276            + self
277                .edges_by_target
278                .values()
279                .filter(|v| v.is_empty())
280                .count()
281    }
282
283    /// Get count of leaf components (no outgoing edges).
284    #[must_use]
285    pub fn leaf_count(&self) -> usize {
286        self.component_count
287            .saturating_sub(self.edges_by_source.len())
288            + self
289                .edges_by_source
290                .values()
291                .filter(|v| v.is_empty())
292                .count()
293    }
294}
295
296/// Builder for creating indexes with optional features.
297#[derive(Debug, Default)]
298#[must_use]
299pub struct SbomIndexBuilder {
300    /// Whether to build name index
301    index_names: bool,
302    /// Whether to build sort keys
303    build_sort_keys: bool,
304}
305
306impl SbomIndexBuilder {
307    /// Create a new builder with all features enabled.
308    pub const fn new() -> Self {
309        Self {
310            index_names: true,
311            build_sort_keys: true,
312        }
313    }
314
315    /// Create a minimal builder (edges only).
316    pub const fn minimal() -> Self {
317        Self {
318            index_names: false,
319            build_sort_keys: false,
320        }
321    }
322
323    /// Enable name indexing.
324    pub const fn with_name_index(mut self) -> Self {
325        self.index_names = true;
326        self
327    }
328
329    /// Enable sort key building.
330    pub const fn with_sort_keys(mut self) -> Self {
331        self.build_sort_keys = true;
332        self
333    }
334
335    /// Build the index with configured options.
336    pub fn build(&self, sbom: &NormalizedSbom) -> NormalizedSbomIndex {
337        let mut edges_by_source: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
338        let mut edges_by_target: HashMap<CanonicalId, Vec<usize>> = HashMap::new();
339        let mut by_name_lower: HashMap<String, Vec<CanonicalId>> = HashMap::new();
340        let mut sort_keys: HashMap<CanonicalId, ComponentSortKey> = HashMap::new();
341
342        // Always index edges
343        for (idx, edge) in sbom.edges.iter().enumerate() {
344            edges_by_source
345                .entry(edge.from.clone())
346                .or_default()
347                .push(idx);
348            edges_by_target
349                .entry(edge.to.clone())
350                .or_default()
351                .push(idx);
352        }
353
354        // Optionally index components
355        if self.index_names || self.build_sort_keys {
356            for (id, comp) in &sbom.components {
357                if self.index_names {
358                    let name_lower = comp.name.to_lowercase();
359                    by_name_lower
360                        .entry(name_lower)
361                        .or_default()
362                        .push(id.clone());
363                }
364
365                if self.build_sort_keys {
366                    sort_keys.insert(id.clone(), ComponentSortKey::from_component(comp));
367                }
368            }
369        }
370
371        NormalizedSbomIndex {
372            edges_by_source,
373            edges_by_target,
374            by_name_lower,
375            sort_keys,
376            component_count: sbom.components.len(),
377            edge_count: sbom.edges.len(),
378        }
379    }
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385    use crate::model::{DependencyType, DocumentMetadata};
386
387    fn make_test_sbom() -> NormalizedSbom {
388        let mut sbom = NormalizedSbom::new(DocumentMetadata::default());
389
390        // Add components
391        let comp_a = Component::new("ComponentA".to_string(), "comp-a".to_string());
392        let comp_b = Component::new("ComponentB".to_string(), "comp-b".to_string());
393        let comp_c = Component::new("componentb".to_string(), "comp-c".to_string()); // Same name lowercase
394
395        let id_a = comp_a.canonical_id.clone();
396        let id_b = comp_b.canonical_id.clone();
397        let id_c = comp_c.canonical_id.clone();
398
399        sbom.add_component(comp_a);
400        sbom.add_component(comp_b);
401        sbom.add_component(comp_c);
402
403        // Add edges: A -> B, A -> C
404        sbom.add_edge(DependencyEdge::new(
405            id_a.clone(),
406            id_b.clone(),
407            DependencyType::DependsOn,
408        ));
409        sbom.add_edge(DependencyEdge::new(
410            id_a.clone(),
411            id_c.clone(),
412            DependencyType::DependsOn,
413        ));
414
415        sbom
416    }
417
418    #[test]
419    fn test_build_index() {
420        let sbom = make_test_sbom();
421        let index = NormalizedSbomIndex::build(&sbom);
422
423        assert_eq!(index.component_count(), 3);
424        assert_eq!(index.edge_count(), 2);
425    }
426
427    #[test]
428    fn test_dependency_lookup() {
429        let sbom = make_test_sbom();
430        let index = NormalizedSbomIndex::build(&sbom);
431
432        let comp_a_id = sbom.components.keys().next().unwrap();
433
434        // A has 2 dependencies
435        assert_eq!(index.dependency_count(comp_a_id), 2);
436
437        // Get actual edges
438        let deps = index.dependencies_of(comp_a_id, &sbom.edges);
439        assert_eq!(deps.len(), 2);
440    }
441
442    #[test]
443    fn test_dependent_lookup() {
444        let sbom = make_test_sbom();
445        let index = NormalizedSbomIndex::build(&sbom);
446
447        // B and C should have 1 dependent each (A)
448        let comp_b_id = sbom.components.keys().nth(1).unwrap();
449        assert_eq!(index.dependent_count(comp_b_id), 1);
450    }
451
452    #[test]
453    fn test_name_lookup() {
454        let sbom = make_test_sbom();
455        let index = NormalizedSbomIndex::build(&sbom);
456
457        // "componentb" should match both ComponentB and componentb
458        let matches = index.find_by_name_lower("componentb");
459        assert_eq!(matches.len(), 2);
460    }
461
462    #[test]
463    fn test_name_search() {
464        let sbom = make_test_sbom();
465        let index = NormalizedSbomIndex::build(&sbom);
466
467        // Search for "component" should match all 3
468        let matches = index.search_by_name("component");
469        assert_eq!(matches.len(), 3);
470    }
471
472    #[test]
473    fn test_sort_keys() {
474        let sbom = make_test_sbom();
475        let index = NormalizedSbomIndex::build(&sbom);
476
477        let comp_a_id = sbom.components.keys().next().unwrap();
478        let sort_key = index.sort_key(comp_a_id).unwrap();
479
480        assert_eq!(sort_key.name_lower, "componenta");
481    }
482
483    #[test]
484    fn test_sort_key_contains() {
485        let mut comp = Component::new("MyPackage".to_string(), "pkg-1".to_string());
486        comp.version = Some("1.2.3".to_string());
487        let key = ComponentSortKey::from_component(&comp);
488
489        assert!(key.contains("mypack"));
490        assert!(key.contains("1.2.3"));
491        assert!(!key.contains("notfound"));
492    }
493
494    #[test]
495    fn test_builder_minimal() {
496        let sbom = make_test_sbom();
497        let index = SbomIndexBuilder::minimal().build(&sbom);
498
499        // Edges should still be indexed
500        assert_eq!(index.edge_count(), 2);
501
502        // But name lookup returns empty (not indexed)
503        let matches = index.find_by_name_lower("componenta");
504        assert!(matches.is_empty());
505    }
506
507    #[test]
508    fn test_empty_sbom() {
509        let sbom = NormalizedSbom::default();
510        let index = NormalizedSbomIndex::build(&sbom);
511
512        assert_eq!(index.component_count(), 0);
513        assert_eq!(index.edge_count(), 0);
514    }
515}