Skip to main content

sbom_tools/model/
identifiers.rs

1//! Canonical identifiers for SBOM components.
2//!
3//! This module provides stable, comparable identifiers for components across
4//! different SBOM formats. The identification strategy uses a tiered fallback:
5//!
6//! 1. **PURL** (Package URL) - Most reliable, globally unique
7//! 2. **CPE** (Common Platform Enumeration) - Industry standard for vulnerability matching
8//! 3. **SWID** (Software Identification) - ISO standard tag
9//! 4. **Synthetic** - Generated from group:name@version (stable across regenerations)
10//! 5. **FormatSpecific** - Original format ID (least stable, may be UUIDs)
11
12use serde::{Deserialize, Serialize};
13use std::fmt;
14use std::hash::{Hash, Hasher};
15
16/// Canonical identifier for a component.
17///
18/// This provides a stable, comparable identifier across different SBOM formats.
19/// The identifier is derived from the PURL when available, falling back through
20/// a tiered strategy to ensure stability.
21#[derive(Debug, Clone, Eq, Serialize, Deserialize)]
22pub struct CanonicalId {
23    /// The normalized identifier string
24    value: String,
25    /// Source of the identifier
26    source: IdSource,
27    /// Whether this ID is considered stable across SBOM regenerations
28    #[serde(default)]
29    stable: bool,
30}
31
32/// Source of the canonical identifier, ordered by reliability
33#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
34pub enum IdSource {
35    /// Derived from Package URL (most reliable)
36    Purl,
37    /// Derived from CPE
38    Cpe,
39    /// Derived from SWID tag
40    Swid,
41    /// Derived from name and version (stable)
42    NameVersion,
43    /// Synthetically generated from group:name@version
44    Synthetic,
45    /// Format-specific identifier (least stable - may be UUID)
46    FormatSpecific,
47}
48
49impl IdSource {
50    /// Returns true if this source produces stable identifiers
51    pub fn is_stable(&self) -> bool {
52        matches!(
53            self,
54            IdSource::Purl
55                | IdSource::Cpe
56                | IdSource::Swid
57                | IdSource::NameVersion
58                | IdSource::Synthetic
59        )
60    }
61
62    /// Returns the reliability rank (lower is better)
63    pub fn reliability_rank(&self) -> u8 {
64        match self {
65            IdSource::Purl => 0,
66            IdSource::Cpe => 1,
67            IdSource::Swid => 2,
68            IdSource::NameVersion => 3,
69            IdSource::Synthetic => 4,
70            IdSource::FormatSpecific => 5,
71        }
72    }
73}
74
75impl CanonicalId {
76    /// Create a new canonical ID from a PURL
77    pub fn from_purl(purl: &str) -> Self {
78        Self {
79            value: Self::normalize_purl(purl),
80            source: IdSource::Purl,
81            stable: true,
82        }
83    }
84
85    /// Create a new canonical ID from name and version
86    pub fn from_name_version(name: &str, version: Option<&str>) -> Self {
87        let value = match version {
88            Some(v) => format!("{}@{}", name.to_lowercase(), v),
89            None => name.to_lowercase(),
90        };
91        Self {
92            value,
93            source: IdSource::NameVersion,
94            stable: true,
95        }
96    }
97
98    /// Create a synthetic canonical ID from group, name, and version
99    ///
100    /// This provides a stable identifier when primary identifiers (PURL, CPE, SWID)
101    /// are not available. The format is: `group:name@version` or `name@version`.
102    pub fn synthetic(group: Option<&str>, name: &str, version: Option<&str>) -> Self {
103        let value = match (group, version) {
104            (Some(g), Some(v)) => format!("{}:{}@{}", g.to_lowercase(), name.to_lowercase(), v),
105            (Some(g), None) => format!("{}:{}", g.to_lowercase(), name.to_lowercase()),
106            (None, Some(v)) => format!("{}@{}", name.to_lowercase(), v),
107            (None, None) => name.to_lowercase(),
108        };
109        Self {
110            value,
111            source: IdSource::Synthetic,
112            stable: true,
113        }
114    }
115
116    /// Create a new canonical ID from a format-specific identifier
117    ///
118    /// **Warning**: Format-specific IDs (like bom-ref UUIDs) are often unstable
119    /// across SBOM regenerations. Use `synthetic()` or other methods when possible.
120    pub fn from_format_id(id: &str) -> Self {
121        // Check if this looks like a UUID (unstable)
122        let looks_like_uuid = id.len() == 36
123            && id.chars().filter(|c| *c == '-').count() == 4
124            && id.chars().all(|c| c.is_ascii_hexdigit() || c == '-');
125
126        Self {
127            value: id.to_string(),
128            source: IdSource::FormatSpecific,
129            stable: !looks_like_uuid,
130        }
131    }
132
133    /// Create from CPE
134    pub fn from_cpe(cpe: &str) -> Self {
135        Self {
136            value: cpe.to_lowercase(),
137            source: IdSource::Cpe,
138            stable: true,
139        }
140    }
141
142    /// Create from SWID tag
143    pub fn from_swid(swid: &str) -> Self {
144        Self {
145            value: swid.to_string(),
146            source: IdSource::Swid,
147            stable: true,
148        }
149    }
150
151    /// Get the canonical ID value
152    pub fn value(&self) -> &str {
153        &self.value
154    }
155
156    /// Get the source of this identifier
157    pub fn source(&self) -> &IdSource {
158        &self.source
159    }
160
161    /// Returns true if this identifier is stable across SBOM regenerations
162    pub fn is_stable(&self) -> bool {
163        self.stable
164    }
165
166    /// Normalize a PURL string for comparison
167    fn normalize_purl(purl: &str) -> String {
168        // Basic normalization - a full implementation would use the packageurl crate
169        let mut normalized = purl.to_lowercase();
170
171        // Handle common ecosystem-specific normalizations
172        if normalized.starts_with("pkg:pypi/") {
173            // PyPI: normalize underscores, hyphens, and dots to hyphens
174            normalized = normalized.replace(['_', '.'], "-");
175        } else if normalized.starts_with("pkg:npm/") {
176            // NPM: decode URL-encoded scope
177            normalized = normalized.replace("%40", "@");
178        }
179
180        normalized
181    }
182}
183
184impl PartialEq for CanonicalId {
185    fn eq(&self, other: &Self) -> bool {
186        self.value == other.value
187    }
188}
189
190impl Hash for CanonicalId {
191    fn hash<H: Hasher>(&self, state: &mut H) {
192        self.value.hash(state);
193    }
194}
195
196impl fmt::Display for CanonicalId {
197    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
198        write!(f, "{}", self.value)
199    }
200}
201
202/// Component identifiers from various sources
203#[derive(Debug, Clone, Default, Serialize, Deserialize)]
204pub struct ComponentIdentifiers {
205    /// Package URL (preferred identifier)
206    pub purl: Option<String>,
207    /// Common Platform Enumeration identifiers
208    pub cpe: Vec<String>,
209    /// Software Identification tag
210    pub swid: Option<String>,
211    /// Original format-specific identifier
212    pub format_id: String,
213    /// Known aliases for this component
214    pub aliases: Vec<String>,
215}
216
217/// Result of canonical ID generation, including stability information
218#[derive(Debug, Clone)]
219pub struct CanonicalIdResult {
220    /// The canonical ID
221    pub id: CanonicalId,
222    /// Warning message if fallback was used
223    pub warning: Option<String>,
224}
225
226impl ComponentIdentifiers {
227    /// Create a new empty set of identifiers
228    pub fn new(format_id: String) -> Self {
229        Self {
230            format_id,
231            ..Default::default()
232        }
233    }
234
235    /// Get the best available canonical ID (without component context)
236    ///
237    /// For better stability, prefer `canonical_id_with_context()` which can
238    /// generate synthetic IDs from component metadata.
239    pub fn canonical_id(&self) -> CanonicalId {
240        // Tiered fallback: PURL → CPE → SWID → format_id
241        if let Some(purl) = &self.purl {
242            CanonicalId::from_purl(purl)
243        } else if let Some(cpe) = self.cpe.first() {
244            CanonicalId::from_cpe(cpe)
245        } else if let Some(swid) = &self.swid {
246            CanonicalId::from_swid(swid)
247        } else {
248            CanonicalId::from_format_id(&self.format_id)
249        }
250    }
251
252    /// Get the best available canonical ID with component context for stable fallback
253    ///
254    /// This method uses a tiered fallback strategy:
255    /// 1. PURL (most reliable)
256    /// 2. CPE
257    /// 3. SWID
258    /// 4. Synthetic (group:name@version) - stable across regenerations
259    /// 5. Format-specific ID (least stable)
260    ///
261    /// Returns both the ID and any warnings about stability.
262    pub fn canonical_id_with_context(
263        &self,
264        name: &str,
265        version: Option<&str>,
266        group: Option<&str>,
267    ) -> CanonicalIdResult {
268        // Tier 1: PURL (best)
269        if let Some(purl) = &self.purl {
270            return CanonicalIdResult {
271                id: CanonicalId::from_purl(purl),
272                warning: None,
273            };
274        }
275
276        // Tier 2: CPE
277        if let Some(cpe) = self.cpe.first() {
278            return CanonicalIdResult {
279                id: CanonicalId::from_cpe(cpe),
280                warning: None,
281            };
282        }
283
284        // Tier 3: SWID
285        if let Some(swid) = &self.swid {
286            return CanonicalIdResult {
287                id: CanonicalId::from_swid(swid),
288                warning: None,
289            };
290        }
291
292        // Tier 4: Synthetic from name/version/group (stable)
293        // Only use if we have at least a name
294        if !name.is_empty() {
295            return CanonicalIdResult {
296                id: CanonicalId::synthetic(group, name, version),
297                warning: Some(format!(
298                    "Component '{}' lacks PURL/CPE/SWID identifiers; using synthetic ID. \
299                     Consider enriching SBOM with package URLs for accurate diffing.",
300                    name
301                )),
302            };
303        }
304
305        // Tier 5: Format-specific (least stable - may be UUID)
306        let id = CanonicalId::from_format_id(&self.format_id);
307        let warning = if !id.is_stable() {
308            Some(format!(
309                "Component uses unstable format-specific ID '{}'. \
310                 This may cause inaccurate diff results across SBOM regenerations.",
311                self.format_id
312            ))
313        } else {
314            Some(format!(
315                "Component uses format-specific ID '{}' without standard identifiers.",
316                self.format_id
317            ))
318        };
319
320        CanonicalIdResult { id, warning }
321    }
322
323    /// Check if this component has any stable identifiers
324    pub fn has_stable_id(&self) -> bool {
325        self.purl.is_some() || !self.cpe.is_empty() || self.swid.is_some()
326    }
327
328    /// Get the reliability level of available identifiers
329    pub fn id_reliability(&self) -> IdReliability {
330        if self.purl.is_some() {
331            IdReliability::High
332        } else if !self.cpe.is_empty() || self.swid.is_some() {
333            IdReliability::Medium
334        } else {
335            IdReliability::Low
336        }
337    }
338}
339
340/// Reliability level of component identification
341#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
342pub enum IdReliability {
343    /// High reliability (PURL available)
344    High,
345    /// Medium reliability (CPE or SWID available)
346    Medium,
347    /// Low reliability (synthetic or format-specific only)
348    Low,
349}
350
351impl fmt::Display for IdReliability {
352    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353        match self {
354            IdReliability::High => write!(f, "high"),
355            IdReliability::Medium => write!(f, "medium"),
356            IdReliability::Low => write!(f, "low"),
357        }
358    }
359}
360
361/// Ecosystem/package manager type
362#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
363pub enum Ecosystem {
364    Npm,
365    PyPi,
366    Cargo,
367    Maven,
368    Golang,
369    Nuget,
370    RubyGems,
371    Composer,
372    CocoaPods,
373    Swift,
374    Hex,
375    Pub,
376    Hackage,
377    Cpan,
378    Cran,
379    Conda,
380    Conan,
381    Deb,
382    Rpm,
383    Apk,
384    Generic,
385    Unknown(String),
386}
387
388impl Ecosystem {
389    /// Parse ecosystem from PURL type
390    pub fn from_purl_type(purl_type: &str) -> Self {
391        match purl_type.to_lowercase().as_str() {
392            "npm" => Ecosystem::Npm,
393            "pypi" => Ecosystem::PyPi,
394            "cargo" => Ecosystem::Cargo,
395            "maven" => Ecosystem::Maven,
396            "golang" | "go" => Ecosystem::Golang,
397            "nuget" => Ecosystem::Nuget,
398            "gem" => Ecosystem::RubyGems,
399            "composer" => Ecosystem::Composer,
400            "cocoapods" => Ecosystem::CocoaPods,
401            "swift" => Ecosystem::Swift,
402            "hex" => Ecosystem::Hex,
403            "pub" => Ecosystem::Pub,
404            "hackage" => Ecosystem::Hackage,
405            "cpan" => Ecosystem::Cpan,
406            "cran" => Ecosystem::Cran,
407            "conda" => Ecosystem::Conda,
408            "conan" => Ecosystem::Conan,
409            "deb" => Ecosystem::Deb,
410            "rpm" => Ecosystem::Rpm,
411            "apk" => Ecosystem::Apk,
412            "generic" => Ecosystem::Generic,
413            other => Ecosystem::Unknown(other.to_string()),
414        }
415    }
416}
417
418impl fmt::Display for Ecosystem {
419    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
420        match self {
421            Ecosystem::Npm => write!(f, "npm"),
422            Ecosystem::PyPi => write!(f, "pypi"),
423            Ecosystem::Cargo => write!(f, "cargo"),
424            Ecosystem::Maven => write!(f, "maven"),
425            Ecosystem::Golang => write!(f, "golang"),
426            Ecosystem::Nuget => write!(f, "nuget"),
427            Ecosystem::RubyGems => write!(f, "gem"),
428            Ecosystem::Composer => write!(f, "composer"),
429            Ecosystem::CocoaPods => write!(f, "cocoapods"),
430            Ecosystem::Swift => write!(f, "swift"),
431            Ecosystem::Hex => write!(f, "hex"),
432            Ecosystem::Pub => write!(f, "pub"),
433            Ecosystem::Hackage => write!(f, "hackage"),
434            Ecosystem::Cpan => write!(f, "cpan"),
435            Ecosystem::Cran => write!(f, "cran"),
436            Ecosystem::Conda => write!(f, "conda"),
437            Ecosystem::Conan => write!(f, "conan"),
438            Ecosystem::Deb => write!(f, "deb"),
439            Ecosystem::Rpm => write!(f, "rpm"),
440            Ecosystem::Apk => write!(f, "apk"),
441            Ecosystem::Generic => write!(f, "generic"),
442            Ecosystem::Unknown(s) => write!(f, "{}", s),
443        }
444    }
445}
446
447// ============================================================================
448// ComponentRef: Lightweight reference combining ID and display name
449// ============================================================================
450
451/// A lightweight reference to a component, combining its stable ID with
452/// a human-readable display name.
453///
454/// This type is used throughout the diff system and TUI to:
455/// - Navigate and link by ID (stable, unique)
456/// - Display by name (human-readable)
457///
458/// # Example
459/// ```ignore
460/// let comp_ref = ComponentRef::new(component.canonical_id.clone(), &component.name);
461/// println!("Component: {} (ID: {})", comp_ref.name(), comp_ref.id());
462/// ```
463#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
464pub struct ComponentRef {
465    /// The stable canonical ID for linking and navigation
466    id: CanonicalId,
467    /// Human-readable name for display
468    name: String,
469    /// Optional version for display context
470    #[serde(skip_serializing_if = "Option::is_none")]
471    version: Option<String>,
472}
473
474impl ComponentRef {
475    /// Create a new component reference
476    pub fn new(id: CanonicalId, name: impl Into<String>) -> Self {
477        Self {
478            id,
479            name: name.into(),
480            version: None,
481        }
482    }
483
484    /// Create a component reference with version
485    pub fn with_version(id: CanonicalId, name: impl Into<String>, version: Option<String>) -> Self {
486        Self {
487            id,
488            name: name.into(),
489            version,
490        }
491    }
492
493    /// Create from a Component
494    pub fn from_component(component: &super::Component) -> Self {
495        Self {
496            id: component.canonical_id.clone(),
497            name: component.name.clone(),
498            version: component.version.clone(),
499        }
500    }
501
502    /// Get the canonical ID
503    pub fn id(&self) -> &CanonicalId {
504        &self.id
505    }
506
507    /// Get the ID as a string
508    pub fn id_str(&self) -> &str {
509        self.id.value()
510    }
511
512    /// Get the display name
513    pub fn name(&self) -> &str {
514        &self.name
515    }
516
517    /// Get the version if available
518    pub fn version(&self) -> Option<&str> {
519        self.version.as_deref()
520    }
521
522    /// Get display string with version if available
523    pub fn display_with_version(&self) -> String {
524        match &self.version {
525            Some(v) => format!("{}@{}", self.name, v),
526            None => self.name.clone(),
527        }
528    }
529
530    /// Check if this ref matches a given ID
531    pub fn matches_id(&self, id: &CanonicalId) -> bool {
532        &self.id == id
533    }
534
535    /// Check if this ref matches a given ID string
536    pub fn matches_id_str(&self, id_str: &str) -> bool {
537        self.id.value() == id_str
538    }
539}
540
541impl fmt::Display for ComponentRef {
542    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
543        write!(f, "{}", self.name)
544    }
545}
546
547impl From<&super::Component> for ComponentRef {
548    fn from(component: &super::Component) -> Self {
549        Self::from_component(component)
550    }
551}
552
553/// A reference to a vulnerability with its associated component
554#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
555pub struct VulnerabilityRef2 {
556    /// Vulnerability ID (e.g., CVE-2021-44228)
557    pub vuln_id: String,
558    /// Reference to the affected component
559    pub component: ComponentRef,
560}
561
562impl VulnerabilityRef2 {
563    /// Create a new vulnerability reference
564    pub fn new(vuln_id: impl Into<String>, component: ComponentRef) -> Self {
565        Self {
566            vuln_id: vuln_id.into(),
567            component,
568        }
569    }
570
571    /// Get the component's canonical ID
572    pub fn component_id(&self) -> &CanonicalId {
573        self.component.id()
574    }
575
576    /// Get the component name for display
577    pub fn component_name(&self) -> &str {
578        self.component.name()
579    }
580}