Skip to main content

sbom_tools/model/
identifiers.rs

1//! Canonical identifiers for SBOM components.
2//!
3//! This module provides stable, comparable identifiers for components across
4//! different SBOM formats. The identification strategy uses a tiered fallback:
5//!
6//! 1. **PURL** (Package URL) - Most reliable, globally unique
7//! 2. **CPE** (Common Platform Enumeration) - Industry standard for vulnerability matching
8//! 3. **SWID** (Software Identification) - ISO standard tag
9//! 4. **Synthetic** - Generated from group:name@version (stable across regenerations)
10//! 5. **`FormatSpecific`** - Original format ID (least stable, may be UUIDs)
11
12use serde::{Deserialize, Serialize};
13use std::fmt;
14use std::hash::{Hash, Hasher};
15
16/// Canonical identifier for a component.
17///
18/// This provides a stable, comparable identifier across different SBOM formats.
19/// The identifier is derived from the PURL when available, falling back through
20/// a tiered strategy to ensure stability.
21#[derive(Debug, Clone, Eq, Serialize, Deserialize)]
22pub struct CanonicalId {
23    /// The normalized identifier string
24    value: String,
25    /// Source of the identifier
26    source: IdSource,
27    /// Whether this ID is considered stable across SBOM regenerations
28    #[serde(default)]
29    stable: bool,
30}
31
32/// Source of the canonical identifier, ordered by reliability
33#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
34pub enum IdSource {
35    /// Derived from Package URL (most reliable)
36    Purl,
37    /// Derived from CPE
38    Cpe,
39    /// Derived from SWID tag
40    Swid,
41    /// Derived from name and version (stable)
42    NameVersion,
43    /// Synthetically generated from group:name@version
44    Synthetic,
45    /// Format-specific identifier (least stable - may be UUID)
46    FormatSpecific,
47}
48
49impl IdSource {
50    /// Returns true if this source produces stable identifiers
51    #[must_use]
52    pub const fn is_stable(&self) -> bool {
53        matches!(
54            self,
55            Self::Purl | Self::Cpe | Self::Swid | Self::NameVersion | Self::Synthetic
56        )
57    }
58
59    /// Returns the reliability rank (lower is better)
60    #[must_use]
61    pub const fn reliability_rank(&self) -> u8 {
62        match self {
63            Self::Purl => 0,
64            Self::Cpe => 1,
65            Self::Swid => 2,
66            Self::NameVersion => 3,
67            Self::Synthetic => 4,
68            Self::FormatSpecific => 5,
69        }
70    }
71}
72
73impl CanonicalId {
74    /// Create a new canonical ID from a PURL
75    #[must_use]
76    pub fn from_purl(purl: &str) -> Self {
77        Self {
78            value: Self::normalize_purl(purl),
79            source: IdSource::Purl,
80            stable: true,
81        }
82    }
83
84    /// Create a new canonical ID from name and version
85    #[must_use]
86    pub fn from_name_version(name: &str, version: Option<&str>) -> Self {
87        let value = version.map_or_else(
88            || name.to_lowercase(),
89            |v| format!("{}@{}", name.to_lowercase(), v),
90        );
91        Self {
92            value,
93            source: IdSource::NameVersion,
94            stable: true,
95        }
96    }
97
98    /// Create a synthetic canonical ID from group, name, and version
99    ///
100    /// This provides a stable identifier when primary identifiers (PURL, CPE, SWID)
101    /// are not available. The format is: `group:name@version` or `name@version`.
102    #[must_use]
103    pub fn synthetic(group: Option<&str>, name: &str, version: Option<&str>) -> Self {
104        let value = match (group, version) {
105            (Some(g), Some(v)) => format!("{}:{}@{}", g.to_lowercase(), name.to_lowercase(), v),
106            (Some(g), None) => format!("{}:{}", g.to_lowercase(), name.to_lowercase()),
107            (None, Some(v)) => format!("{}@{}", name.to_lowercase(), v),
108            (None, None) => name.to_lowercase(),
109        };
110        Self {
111            value,
112            source: IdSource::Synthetic,
113            stable: true,
114        }
115    }
116
117    /// Create a new canonical ID from a format-specific identifier
118    ///
119    /// **Warning**: Format-specific IDs (like bom-ref UUIDs) are often unstable
120    /// across SBOM regenerations. Use `synthetic()` or other methods when possible.
121    #[must_use]
122    pub fn from_format_id(id: &str) -> Self {
123        // Check if this looks like a UUID (unstable)
124        let looks_like_uuid = id.len() == 36
125            && id.chars().filter(|c| *c == '-').count() == 4
126            && id.chars().all(|c| c.is_ascii_hexdigit() || c == '-');
127
128        Self {
129            value: id.to_string(),
130            source: IdSource::FormatSpecific,
131            stable: !looks_like_uuid,
132        }
133    }
134
135    /// Create from CPE
136    #[must_use]
137    pub fn from_cpe(cpe: &str) -> Self {
138        Self {
139            value: cpe.to_lowercase(),
140            source: IdSource::Cpe,
141            stable: true,
142        }
143    }
144
145    /// Create from SWID tag
146    #[must_use]
147    pub fn from_swid(swid: &str) -> Self {
148        Self {
149            value: swid.to_string(),
150            source: IdSource::Swid,
151            stable: true,
152        }
153    }
154
155    /// Get the canonical ID value
156    #[must_use]
157    pub fn value(&self) -> &str {
158        &self.value
159    }
160
161    /// Get the source of this identifier
162    #[must_use]
163    pub const fn source(&self) -> &IdSource {
164        &self.source
165    }
166
167    /// Returns true if this identifier is stable across SBOM regenerations
168    #[must_use]
169    pub const fn is_stable(&self) -> bool {
170        self.stable
171    }
172
173    /// Normalize a PURL string for comparison
174    fn normalize_purl(purl: &str) -> String {
175        // Basic normalization - a full implementation would use the packageurl crate
176        let mut normalized = purl.to_lowercase();
177
178        // Handle common ecosystem-specific normalizations
179        if normalized.starts_with("pkg:pypi/") {
180            // PyPI: normalize underscores, hyphens, and dots to hyphens
181            normalized = normalized.replace(['_', '.'], "-");
182        } else if normalized.starts_with("pkg:npm/") {
183            // NPM: decode URL-encoded scope
184            normalized = normalized.replace("%40", "@");
185        }
186
187        normalized
188    }
189}
190
191impl PartialEq for CanonicalId {
192    fn eq(&self, other: &Self) -> bool {
193        self.value == other.value
194    }
195}
196
197impl Hash for CanonicalId {
198    fn hash<H: Hasher>(&self, state: &mut H) {
199        self.value.hash(state);
200    }
201}
202
203impl fmt::Display for CanonicalId {
204    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
205        write!(f, "{}", self.value)
206    }
207}
208
209/// Component identifiers from various sources
210#[derive(Debug, Clone, Default, Serialize, Deserialize)]
211pub struct ComponentIdentifiers {
212    /// Package URL (preferred identifier)
213    pub purl: Option<String>,
214    /// Common Platform Enumeration identifiers
215    pub cpe: Vec<String>,
216    /// Software Identification tag
217    pub swid: Option<String>,
218    /// Original format-specific identifier
219    pub format_id: String,
220    /// Known aliases for this component
221    pub aliases: Vec<String>,
222}
223
224/// Result of canonical ID generation, including stability information
225#[derive(Debug, Clone)]
226pub struct CanonicalIdResult {
227    /// The canonical ID
228    pub id: CanonicalId,
229    /// Warning message if fallback was used
230    pub warning: Option<String>,
231}
232
233impl ComponentIdentifiers {
234    /// Create a new empty set of identifiers
235    #[must_use]
236    pub fn new(format_id: String) -> Self {
237        Self {
238            format_id,
239            ..Default::default()
240        }
241    }
242
243    /// Get the best available canonical ID (without component context)
244    ///
245    /// For better stability, prefer `canonical_id_with_context()` which can
246    /// generate synthetic IDs from component metadata.
247    #[must_use]
248    pub fn canonical_id(&self) -> CanonicalId {
249        // Tiered fallback: PURL → CPE → SWID → format_id
250        self.purl.as_ref().map_or_else(
251            || {
252                self.cpe.first().map_or_else(
253                    || {
254                        self.swid.as_ref().map_or_else(
255                            || CanonicalId::from_format_id(&self.format_id),
256                            |swid| CanonicalId::from_swid(swid),
257                        )
258                    },
259                    |cpe| CanonicalId::from_cpe(cpe),
260                )
261            },
262            |purl| CanonicalId::from_purl(purl),
263        )
264    }
265
266    /// Get the best available canonical ID with component context for stable fallback
267    ///
268    /// This method uses a tiered fallback strategy:
269    /// 1. PURL (most reliable)
270    /// 2. CPE
271    /// 3. SWID
272    /// 4. Synthetic (group:name@version) - stable across regenerations
273    /// 5. Format-specific ID (least stable)
274    ///
275    /// Returns both the ID and any warnings about stability.
276    #[must_use]
277    pub fn canonical_id_with_context(
278        &self,
279        name: &str,
280        version: Option<&str>,
281        group: Option<&str>,
282    ) -> CanonicalIdResult {
283        // Tier 1: PURL (best)
284        if let Some(purl) = &self.purl {
285            return CanonicalIdResult {
286                id: CanonicalId::from_purl(purl),
287                warning: None,
288            };
289        }
290
291        // Tier 2: CPE
292        if let Some(cpe) = self.cpe.first() {
293            return CanonicalIdResult {
294                id: CanonicalId::from_cpe(cpe),
295                warning: None,
296            };
297        }
298
299        // Tier 3: SWID
300        if let Some(swid) = &self.swid {
301            return CanonicalIdResult {
302                id: CanonicalId::from_swid(swid),
303                warning: None,
304            };
305        }
306
307        // Tier 4: Synthetic from name/version/group (stable)
308        // Only use if we have at least a name
309        if !name.is_empty() {
310            return CanonicalIdResult {
311                id: CanonicalId::synthetic(group, name, version),
312                warning: Some(format!(
313                    "Component '{name}' lacks PURL/CPE/SWID identifiers; using synthetic ID. \
314                     Consider enriching SBOM with package URLs for accurate diffing."
315                )),
316            };
317        }
318
319        // Tier 5: Format-specific (least stable - may be UUID)
320        let id = CanonicalId::from_format_id(&self.format_id);
321        let warning = if id.is_stable() {
322            Some(format!(
323                "Component uses format-specific ID '{}' without standard identifiers.",
324                self.format_id
325            ))
326        } else {
327            Some(format!(
328                "Component uses unstable format-specific ID '{}'. \
329                 This may cause inaccurate diff results across SBOM regenerations.",
330                self.format_id
331            ))
332        };
333
334        CanonicalIdResult { id, warning }
335    }
336
337    /// Check if this component has any stable identifiers
338    #[must_use]
339    pub fn has_stable_id(&self) -> bool {
340        self.purl.is_some() || !self.cpe.is_empty() || self.swid.is_some()
341    }
342
343    /// Get the reliability level of available identifiers
344    #[must_use]
345    pub fn id_reliability(&self) -> IdReliability {
346        if self.purl.is_some() {
347            IdReliability::High
348        } else if !self.cpe.is_empty() || self.swid.is_some() {
349            IdReliability::Medium
350        } else {
351            IdReliability::Low
352        }
353    }
354}
355
356/// Reliability level of component identification
357#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
358pub enum IdReliability {
359    /// High reliability (PURL available)
360    High,
361    /// Medium reliability (CPE or SWID available)
362    Medium,
363    /// Low reliability (synthetic or format-specific only)
364    Low,
365}
366
367impl fmt::Display for IdReliability {
368    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
369        match self {
370            Self::High => write!(f, "high"),
371            Self::Medium => write!(f, "medium"),
372            Self::Low => write!(f, "low"),
373        }
374    }
375}
376
377/// Ecosystem/package manager type
378#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
379#[non_exhaustive]
380pub enum Ecosystem {
381    Npm,
382    PyPi,
383    Cargo,
384    Maven,
385    Golang,
386    Nuget,
387    RubyGems,
388    Composer,
389    CocoaPods,
390    Swift,
391    Hex,
392    Pub,
393    Hackage,
394    Cpan,
395    Cran,
396    Conda,
397    Conan,
398    Deb,
399    Rpm,
400    Apk,
401    Generic,
402    Unknown(String),
403}
404
405impl Ecosystem {
406    /// Parse ecosystem from PURL type
407    #[must_use]
408    pub fn from_purl_type(purl_type: &str) -> Self {
409        match purl_type.to_lowercase().as_str() {
410            "npm" => Self::Npm,
411            "pypi" => Self::PyPi,
412            "cargo" => Self::Cargo,
413            "maven" => Self::Maven,
414            "golang" | "go" => Self::Golang,
415            "nuget" => Self::Nuget,
416            "gem" => Self::RubyGems,
417            "composer" => Self::Composer,
418            "cocoapods" => Self::CocoaPods,
419            "swift" => Self::Swift,
420            "hex" => Self::Hex,
421            "pub" => Self::Pub,
422            "hackage" => Self::Hackage,
423            "cpan" => Self::Cpan,
424            "cran" => Self::Cran,
425            "conda" => Self::Conda,
426            "conan" => Self::Conan,
427            "deb" => Self::Deb,
428            "rpm" => Self::Rpm,
429            "apk" => Self::Apk,
430            "generic" => Self::Generic,
431            other => Self::Unknown(other.to_string()),
432        }
433    }
434}
435
436impl fmt::Display for Ecosystem {
437    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438        match self {
439            Self::Npm => write!(f, "npm"),
440            Self::PyPi => write!(f, "pypi"),
441            Self::Cargo => write!(f, "cargo"),
442            Self::Maven => write!(f, "maven"),
443            Self::Golang => write!(f, "golang"),
444            Self::Nuget => write!(f, "nuget"),
445            Self::RubyGems => write!(f, "gem"),
446            Self::Composer => write!(f, "composer"),
447            Self::CocoaPods => write!(f, "cocoapods"),
448            Self::Swift => write!(f, "swift"),
449            Self::Hex => write!(f, "hex"),
450            Self::Pub => write!(f, "pub"),
451            Self::Hackage => write!(f, "hackage"),
452            Self::Cpan => write!(f, "cpan"),
453            Self::Cran => write!(f, "cran"),
454            Self::Conda => write!(f, "conda"),
455            Self::Conan => write!(f, "conan"),
456            Self::Deb => write!(f, "deb"),
457            Self::Rpm => write!(f, "rpm"),
458            Self::Apk => write!(f, "apk"),
459            Self::Generic => write!(f, "generic"),
460            Self::Unknown(s) => write!(f, "{s}"),
461        }
462    }
463}
464
465// ============================================================================
466// ComponentRef: Lightweight reference combining ID and display name
467// ============================================================================
468
469/// A lightweight reference to a component, combining its stable ID with
470/// a human-readable display name.
471///
472/// This type is used throughout the diff system and TUI to:
473/// - Navigate and link by ID (stable, unique)
474/// - Display by name (human-readable)
475///
476/// # Example
477/// ```ignore
478/// let comp_ref = ComponentRef::new(component.canonical_id.clone(), &component.name);
479/// println!("Component: {} (ID: {})", comp_ref.name(), comp_ref.id());
480/// ```
481#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
482pub struct ComponentRef {
483    /// The stable canonical ID for linking and navigation
484    id: CanonicalId,
485    /// Human-readable name for display
486    name: String,
487    /// Optional version for display context
488    #[serde(skip_serializing_if = "Option::is_none")]
489    version: Option<String>,
490}
491
492impl ComponentRef {
493    /// Create a new component reference
494    pub fn new(id: CanonicalId, name: impl Into<String>) -> Self {
495        Self {
496            id,
497            name: name.into(),
498            version: None,
499        }
500    }
501
502    /// Create a component reference with version
503    pub fn with_version(id: CanonicalId, name: impl Into<String>, version: Option<String>) -> Self {
504        Self {
505            id,
506            name: name.into(),
507            version,
508        }
509    }
510
511    /// Create from a Component
512    #[must_use]
513    pub fn from_component(component: &super::Component) -> Self {
514        Self {
515            id: component.canonical_id.clone(),
516            name: component.name.clone(),
517            version: component.version.clone(),
518        }
519    }
520
521    /// Get the canonical ID
522    #[must_use]
523    pub const fn id(&self) -> &CanonicalId {
524        &self.id
525    }
526
527    /// Get the ID as a string
528    #[must_use]
529    pub fn id_str(&self) -> &str {
530        self.id.value()
531    }
532
533    /// Get the display name
534    #[must_use]
535    pub fn name(&self) -> &str {
536        &self.name
537    }
538
539    /// Get the version if available
540    #[must_use]
541    pub fn version(&self) -> Option<&str> {
542        self.version.as_deref()
543    }
544
545    /// Get display string with version if available
546    #[must_use]
547    pub fn display_with_version(&self) -> String {
548        self.version
549            .as_ref()
550            .map_or_else(|| self.name.clone(), |v| format!("{}@{}", self.name, v))
551    }
552
553    /// Check if this ref matches a given ID
554    #[must_use]
555    pub fn matches_id(&self, id: &CanonicalId) -> bool {
556        &self.id == id
557    }
558
559    /// Check if this ref matches a given ID string
560    #[must_use]
561    pub fn matches_id_str(&self, id_str: &str) -> bool {
562        self.id.value() == id_str
563    }
564}
565
566impl fmt::Display for ComponentRef {
567    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
568        write!(f, "{}", self.name)
569    }
570}
571
572impl From<&super::Component> for ComponentRef {
573    fn from(component: &super::Component) -> Self {
574        Self::from_component(component)
575    }
576}
577
578/// A reference to a vulnerability with its associated component
579#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
580pub struct VulnerabilityRef2 {
581    /// Vulnerability ID (e.g., CVE-2021-44228)
582    pub vuln_id: String,
583    /// Reference to the affected component
584    pub component: ComponentRef,
585}
586
587impl VulnerabilityRef2 {
588    /// Create a new vulnerability reference
589    pub fn new(vuln_id: impl Into<String>, component: ComponentRef) -> Self {
590        Self {
591            vuln_id: vuln_id.into(),
592            component,
593        }
594    }
595
596    /// Get the component's canonical ID
597    #[must_use]
598    pub const fn component_id(&self) -> &CanonicalId {
599        self.component.id()
600    }
601
602    /// Get the component name for display
603    #[must_use]
604    pub fn component_name(&self) -> &str {
605        self.component.name()
606    }
607}