Skip to main content

sbom_tools/model/
identifiers.rs

1//! Canonical identifiers for SBOM components.
2//!
3//! This module provides stable, comparable identifiers for components across
4//! different SBOM formats. The identification strategy uses a tiered fallback:
5//!
6//! 1. **PURL** (Package URL) - Most reliable, globally unique
7//! 2. **CPE** (Common Platform Enumeration) - Industry standard for vulnerability matching
8//! 3. **SWID** (Software Identification) - ISO standard tag
9//! 4. **Synthetic** - Generated from group:name@version (stable across regenerations)
10//! 5. **`FormatSpecific`** - Original format ID (least stable, may be UUIDs)
11
12use serde::{Deserialize, Serialize};
13use std::fmt;
14use std::hash::{Hash, Hasher};
15
16/// Canonical identifier for a component.
17///
18/// This provides a stable, comparable identifier across different SBOM formats.
19/// The identifier is derived from the PURL when available, falling back through
20/// a tiered strategy to ensure stability.
21#[derive(Debug, Clone, Eq, Serialize, Deserialize)]
22pub struct CanonicalId {
23    /// The normalized identifier string
24    value: String,
25    /// Source of the identifier
26    source: IdSource,
27    /// Whether this ID is considered stable across SBOM regenerations
28    #[serde(default)]
29    stable: bool,
30}
31
32/// Source of the canonical identifier, ordered by reliability
33#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
34pub enum IdSource {
35    /// Derived from Package URL (most reliable)
36    Purl,
37    /// Derived from CPE
38    Cpe,
39    /// Derived from SWID tag
40    Swid,
41    /// Derived from name and version (stable)
42    NameVersion,
43    /// Synthetically generated from group:name@version
44    Synthetic,
45    /// Format-specific identifier (least stable - may be UUID)
46    FormatSpecific,
47}
48
49impl IdSource {
50    /// Returns true if this source produces stable identifiers
51    #[must_use] 
52    pub const fn is_stable(&self) -> bool {
53        matches!(
54            self,
55            Self::Purl
56                | Self::Cpe
57                | Self::Swid
58                | Self::NameVersion
59                | Self::Synthetic
60        )
61    }
62
63    /// Returns the reliability rank (lower is better)
64    #[must_use] 
65    pub const fn reliability_rank(&self) -> u8 {
66        match self {
67            Self::Purl => 0,
68            Self::Cpe => 1,
69            Self::Swid => 2,
70            Self::NameVersion => 3,
71            Self::Synthetic => 4,
72            Self::FormatSpecific => 5,
73        }
74    }
75}
76
77impl CanonicalId {
78    /// Create a new canonical ID from a PURL
79    #[must_use] 
80    pub fn from_purl(purl: &str) -> Self {
81        Self {
82            value: Self::normalize_purl(purl),
83            source: IdSource::Purl,
84            stable: true,
85        }
86    }
87
88    /// Create a new canonical ID from name and version
89    #[must_use] 
90    pub fn from_name_version(name: &str, version: Option<&str>) -> Self {
91        let value = version.map_or_else(|| name.to_lowercase(), |v| format!("{}@{}", name.to_lowercase(), v));
92        Self {
93            value,
94            source: IdSource::NameVersion,
95            stable: true,
96        }
97    }
98
99    /// Create a synthetic canonical ID from group, name, and version
100    ///
101    /// This provides a stable identifier when primary identifiers (PURL, CPE, SWID)
102    /// are not available. The format is: `group:name@version` or `name@version`.
103    #[must_use] 
104    pub fn synthetic(group: Option<&str>, name: &str, version: Option<&str>) -> Self {
105        let value = match (group, version) {
106            (Some(g), Some(v)) => format!("{}:{}@{}", g.to_lowercase(), name.to_lowercase(), v),
107            (Some(g), None) => format!("{}:{}", g.to_lowercase(), name.to_lowercase()),
108            (None, Some(v)) => format!("{}@{}", name.to_lowercase(), v),
109            (None, None) => name.to_lowercase(),
110        };
111        Self {
112            value,
113            source: IdSource::Synthetic,
114            stable: true,
115        }
116    }
117
118    /// Create a new canonical ID from a format-specific identifier
119    ///
120    /// **Warning**: Format-specific IDs (like bom-ref UUIDs) are often unstable
121    /// across SBOM regenerations. Use `synthetic()` or other methods when possible.
122    #[must_use] 
123    pub fn from_format_id(id: &str) -> Self {
124        // Check if this looks like a UUID (unstable)
125        let looks_like_uuid = id.len() == 36
126            && id.chars().filter(|c| *c == '-').count() == 4
127            && id.chars().all(|c| c.is_ascii_hexdigit() || c == '-');
128
129        Self {
130            value: id.to_string(),
131            source: IdSource::FormatSpecific,
132            stable: !looks_like_uuid,
133        }
134    }
135
136    /// Create from CPE
137    #[must_use] 
138    pub fn from_cpe(cpe: &str) -> Self {
139        Self {
140            value: cpe.to_lowercase(),
141            source: IdSource::Cpe,
142            stable: true,
143        }
144    }
145
146    /// Create from SWID tag
147    #[must_use] 
148    pub fn from_swid(swid: &str) -> Self {
149        Self {
150            value: swid.to_string(),
151            source: IdSource::Swid,
152            stable: true,
153        }
154    }
155
156    /// Get the canonical ID value
157    #[must_use] 
158    pub fn value(&self) -> &str {
159        &self.value
160    }
161
162    /// Get the source of this identifier
163    #[must_use] 
164    pub const fn source(&self) -> &IdSource {
165        &self.source
166    }
167
168    /// Returns true if this identifier is stable across SBOM regenerations
169    #[must_use] 
170    pub const fn is_stable(&self) -> bool {
171        self.stable
172    }
173
174    /// Normalize a PURL string for comparison
175    fn normalize_purl(purl: &str) -> String {
176        // Basic normalization - a full implementation would use the packageurl crate
177        let mut normalized = purl.to_lowercase();
178
179        // Handle common ecosystem-specific normalizations
180        if normalized.starts_with("pkg:pypi/") {
181            // PyPI: normalize underscores, hyphens, and dots to hyphens
182            normalized = normalized.replace(['_', '.'], "-");
183        } else if normalized.starts_with("pkg:npm/") {
184            // NPM: decode URL-encoded scope
185            normalized = normalized.replace("%40", "@");
186        }
187
188        normalized
189    }
190}
191
192impl PartialEq for CanonicalId {
193    fn eq(&self, other: &Self) -> bool {
194        self.value == other.value
195    }
196}
197
198impl Hash for CanonicalId {
199    fn hash<H: Hasher>(&self, state: &mut H) {
200        self.value.hash(state);
201    }
202}
203
204impl fmt::Display for CanonicalId {
205    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
206        write!(f, "{}", self.value)
207    }
208}
209
210/// Component identifiers from various sources
211#[derive(Debug, Clone, Default, Serialize, Deserialize)]
212pub struct ComponentIdentifiers {
213    /// Package URL (preferred identifier)
214    pub purl: Option<String>,
215    /// Common Platform Enumeration identifiers
216    pub cpe: Vec<String>,
217    /// Software Identification tag
218    pub swid: Option<String>,
219    /// Original format-specific identifier
220    pub format_id: String,
221    /// Known aliases for this component
222    pub aliases: Vec<String>,
223}
224
225/// Result of canonical ID generation, including stability information
226#[derive(Debug, Clone)]
227pub struct CanonicalIdResult {
228    /// The canonical ID
229    pub id: CanonicalId,
230    /// Warning message if fallback was used
231    pub warning: Option<String>,
232}
233
234impl ComponentIdentifiers {
235    /// Create a new empty set of identifiers
236    #[must_use] 
237    pub fn new(format_id: String) -> Self {
238        Self {
239            format_id,
240            ..Default::default()
241        }
242    }
243
244    /// Get the best available canonical ID (without component context)
245    ///
246    /// For better stability, prefer `canonical_id_with_context()` which can
247    /// generate synthetic IDs from component metadata.
248    #[must_use] 
249    pub fn canonical_id(&self) -> CanonicalId {
250        // Tiered fallback: PURL → CPE → SWID → format_id
251        self.purl.as_ref().map_or_else(
252            || {
253                self.cpe.first().map_or_else(
254                    || {
255                        self.swid.as_ref().map_or_else(
256                            || CanonicalId::from_format_id(&self.format_id),
257                            |swid| CanonicalId::from_swid(swid),
258                        )
259                    },
260                    |cpe| CanonicalId::from_cpe(cpe),
261                )
262            },
263            |purl| CanonicalId::from_purl(purl),
264        )
265    }
266
267    /// Get the best available canonical ID with component context for stable fallback
268    ///
269    /// This method uses a tiered fallback strategy:
270    /// 1. PURL (most reliable)
271    /// 2. CPE
272    /// 3. SWID
273    /// 4. Synthetic (group:name@version) - stable across regenerations
274    /// 5. Format-specific ID (least stable)
275    ///
276    /// Returns both the ID and any warnings about stability.
277    #[must_use] 
278    pub fn canonical_id_with_context(
279        &self,
280        name: &str,
281        version: Option<&str>,
282        group: Option<&str>,
283    ) -> CanonicalIdResult {
284        // Tier 1: PURL (best)
285        if let Some(purl) = &self.purl {
286            return CanonicalIdResult {
287                id: CanonicalId::from_purl(purl),
288                warning: None,
289            };
290        }
291
292        // Tier 2: CPE
293        if let Some(cpe) = self.cpe.first() {
294            return CanonicalIdResult {
295                id: CanonicalId::from_cpe(cpe),
296                warning: None,
297            };
298        }
299
300        // Tier 3: SWID
301        if let Some(swid) = &self.swid {
302            return CanonicalIdResult {
303                id: CanonicalId::from_swid(swid),
304                warning: None,
305            };
306        }
307
308        // Tier 4: Synthetic from name/version/group (stable)
309        // Only use if we have at least a name
310        if !name.is_empty() {
311            return CanonicalIdResult {
312                id: CanonicalId::synthetic(group, name, version),
313                warning: Some(format!(
314                    "Component '{name}' lacks PURL/CPE/SWID identifiers; using synthetic ID. \
315                     Consider enriching SBOM with package URLs for accurate diffing."
316                )),
317            };
318        }
319
320        // Tier 5: Format-specific (least stable - may be UUID)
321        let id = CanonicalId::from_format_id(&self.format_id);
322        let warning = if id.is_stable() {
323            Some(format!(
324                "Component uses format-specific ID '{}' without standard identifiers.",
325                self.format_id
326            ))
327        } else {
328            Some(format!(
329                "Component uses unstable format-specific ID '{}'. \
330                 This may cause inaccurate diff results across SBOM regenerations.",
331                self.format_id
332            ))
333        };
334
335        CanonicalIdResult { id, warning }
336    }
337
338    /// Check if this component has any stable identifiers
339    #[must_use] 
340    pub fn has_stable_id(&self) -> bool {
341        self.purl.is_some() || !self.cpe.is_empty() || self.swid.is_some()
342    }
343
344    /// Get the reliability level of available identifiers
345    #[must_use] 
346    pub fn id_reliability(&self) -> IdReliability {
347        if self.purl.is_some() {
348            IdReliability::High
349        } else if !self.cpe.is_empty() || self.swid.is_some() {
350            IdReliability::Medium
351        } else {
352            IdReliability::Low
353        }
354    }
355}
356
357/// Reliability level of component identification
358#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
359pub enum IdReliability {
360    /// High reliability (PURL available)
361    High,
362    /// Medium reliability (CPE or SWID available)
363    Medium,
364    /// Low reliability (synthetic or format-specific only)
365    Low,
366}
367
368impl fmt::Display for IdReliability {
369    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
370        match self {
371            Self::High => write!(f, "high"),
372            Self::Medium => write!(f, "medium"),
373            Self::Low => write!(f, "low"),
374        }
375    }
376}
377
378/// Ecosystem/package manager type
379#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
380#[non_exhaustive]
381pub enum Ecosystem {
382    Npm,
383    PyPi,
384    Cargo,
385    Maven,
386    Golang,
387    Nuget,
388    RubyGems,
389    Composer,
390    CocoaPods,
391    Swift,
392    Hex,
393    Pub,
394    Hackage,
395    Cpan,
396    Cran,
397    Conda,
398    Conan,
399    Deb,
400    Rpm,
401    Apk,
402    Generic,
403    Unknown(String),
404}
405
406impl Ecosystem {
407    /// Parse ecosystem from PURL type
408    #[must_use] 
409    pub fn from_purl_type(purl_type: &str) -> Self {
410        match purl_type.to_lowercase().as_str() {
411            "npm" => Self::Npm,
412            "pypi" => Self::PyPi,
413            "cargo" => Self::Cargo,
414            "maven" => Self::Maven,
415            "golang" | "go" => Self::Golang,
416            "nuget" => Self::Nuget,
417            "gem" => Self::RubyGems,
418            "composer" => Self::Composer,
419            "cocoapods" => Self::CocoaPods,
420            "swift" => Self::Swift,
421            "hex" => Self::Hex,
422            "pub" => Self::Pub,
423            "hackage" => Self::Hackage,
424            "cpan" => Self::Cpan,
425            "cran" => Self::Cran,
426            "conda" => Self::Conda,
427            "conan" => Self::Conan,
428            "deb" => Self::Deb,
429            "rpm" => Self::Rpm,
430            "apk" => Self::Apk,
431            "generic" => Self::Generic,
432            other => Self::Unknown(other.to_string()),
433        }
434    }
435}
436
437impl fmt::Display for Ecosystem {
438    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
439        match self {
440            Self::Npm => write!(f, "npm"),
441            Self::PyPi => write!(f, "pypi"),
442            Self::Cargo => write!(f, "cargo"),
443            Self::Maven => write!(f, "maven"),
444            Self::Golang => write!(f, "golang"),
445            Self::Nuget => write!(f, "nuget"),
446            Self::RubyGems => write!(f, "gem"),
447            Self::Composer => write!(f, "composer"),
448            Self::CocoaPods => write!(f, "cocoapods"),
449            Self::Swift => write!(f, "swift"),
450            Self::Hex => write!(f, "hex"),
451            Self::Pub => write!(f, "pub"),
452            Self::Hackage => write!(f, "hackage"),
453            Self::Cpan => write!(f, "cpan"),
454            Self::Cran => write!(f, "cran"),
455            Self::Conda => write!(f, "conda"),
456            Self::Conan => write!(f, "conan"),
457            Self::Deb => write!(f, "deb"),
458            Self::Rpm => write!(f, "rpm"),
459            Self::Apk => write!(f, "apk"),
460            Self::Generic => write!(f, "generic"),
461            Self::Unknown(s) => write!(f, "{s}"),
462        }
463    }
464}
465
466// ============================================================================
467// ComponentRef: Lightweight reference combining ID and display name
468// ============================================================================
469
470/// A lightweight reference to a component, combining its stable ID with
471/// a human-readable display name.
472///
473/// This type is used throughout the diff system and TUI to:
474/// - Navigate and link by ID (stable, unique)
475/// - Display by name (human-readable)
476///
477/// # Example
478/// ```ignore
479/// let comp_ref = ComponentRef::new(component.canonical_id.clone(), &component.name);
480/// println!("Component: {} (ID: {})", comp_ref.name(), comp_ref.id());
481/// ```
482#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
483pub struct ComponentRef {
484    /// The stable canonical ID for linking and navigation
485    id: CanonicalId,
486    /// Human-readable name for display
487    name: String,
488    /// Optional version for display context
489    #[serde(skip_serializing_if = "Option::is_none")]
490    version: Option<String>,
491}
492
493impl ComponentRef {
494    /// Create a new component reference
495    pub fn new(id: CanonicalId, name: impl Into<String>) -> Self {
496        Self {
497            id,
498            name: name.into(),
499            version: None,
500        }
501    }
502
503    /// Create a component reference with version
504    pub fn with_version(id: CanonicalId, name: impl Into<String>, version: Option<String>) -> Self {
505        Self {
506            id,
507            name: name.into(),
508            version,
509        }
510    }
511
512    /// Create from a Component
513    #[must_use] 
514    pub fn from_component(component: &super::Component) -> Self {
515        Self {
516            id: component.canonical_id.clone(),
517            name: component.name.clone(),
518            version: component.version.clone(),
519        }
520    }
521
522    /// Get the canonical ID
523    #[must_use] 
524    pub const fn id(&self) -> &CanonicalId {
525        &self.id
526    }
527
528    /// Get the ID as a string
529    #[must_use] 
530    pub fn id_str(&self) -> &str {
531        self.id.value()
532    }
533
534    /// Get the display name
535    #[must_use] 
536    pub fn name(&self) -> &str {
537        &self.name
538    }
539
540    /// Get the version if available
541    #[must_use] 
542    pub fn version(&self) -> Option<&str> {
543        self.version.as_deref()
544    }
545
546    /// Get display string with version if available
547    #[must_use] 
548    pub fn display_with_version(&self) -> String {
549        self.version.as_ref().map_or_else(|| self.name.clone(), |v| format!("{}@{}", self.name, v))
550    }
551
552    /// Check if this ref matches a given ID
553    #[must_use] 
554    pub fn matches_id(&self, id: &CanonicalId) -> bool {
555        &self.id == id
556    }
557
558    /// Check if this ref matches a given ID string
559    #[must_use] 
560    pub fn matches_id_str(&self, id_str: &str) -> bool {
561        self.id.value() == id_str
562    }
563}
564
565impl fmt::Display for ComponentRef {
566    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
567        write!(f, "{}", self.name)
568    }
569}
570
571impl From<&super::Component> for ComponentRef {
572    fn from(component: &super::Component) -> Self {
573        Self::from_component(component)
574    }
575}
576
577/// A reference to a vulnerability with its associated component
578#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
579pub struct VulnerabilityRef2 {
580    /// Vulnerability ID (e.g., CVE-2021-44228)
581    pub vuln_id: String,
582    /// Reference to the affected component
583    pub component: ComponentRef,
584}
585
586impl VulnerabilityRef2 {
587    /// Create a new vulnerability reference
588    pub fn new(vuln_id: impl Into<String>, component: ComponentRef) -> Self {
589        Self {
590            vuln_id: vuln_id.into(),
591            component,
592        }
593    }
594
595    /// Get the component's canonical ID
596    #[must_use] 
597    pub const fn component_id(&self) -> &CanonicalId {
598        self.component.id()
599    }
600
601    /// Get the component name for display
602    #[must_use] 
603    pub fn component_name(&self) -> &str {
604        self.component.name()
605    }
606}