Skip to main content

sbom_tools/model/
identifiers.rs

1//! Canonical identifiers for SBOM components.
2//!
3//! This module provides stable, comparable identifiers for components across
4//! different SBOM formats. The identification strategy uses a tiered fallback:
5//!
6//! 1. **PURL** (Package URL) - Most reliable, globally unique
7//! 2. **CPE** (Common Platform Enumeration) - Industry standard for vulnerability matching
8//! 3. **SWHID** (Software Heritage persistent ID) - Content-addressed, ISO/IEC 18670
9//! 4. **SWID** (Software Identification) - ISO standard tag
10//! 5. **Synthetic** - Generated from group:name@version (stable across regenerations)
11//! 6. **`FormatSpecific`** - Original format ID (least stable, may be UUIDs)
12//!
13//! SWHID is one of the three identifier types named by CRA prEN 40000-1-3
14//! `[PRE-7-RQ-07]` (alongside PURL and CPE).
15
16use serde::{Deserialize, Serialize};
17use std::fmt;
18use std::hash::{Hash, Hasher};
19
20/// Canonical identifier for a component.
21///
22/// This provides a stable, comparable identifier across different SBOM formats.
23/// The identifier is derived from the PURL when available, falling back through
24/// a tiered strategy to ensure stability.
25#[derive(Debug, Clone, Eq, Serialize, Deserialize)]
26pub struct CanonicalId {
27    /// The normalized identifier string
28    value: String,
29    /// Source of the identifier
30    source: IdSource,
31    /// Whether this ID is considered stable across SBOM regenerations
32    #[serde(default)]
33    stable: bool,
34}
35
36/// Source of the canonical identifier, ordered by reliability
37#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
38#[non_exhaustive]
39pub enum IdSource {
40    /// Derived from Package URL (most reliable)
41    Purl,
42    /// Derived from CPE
43    Cpe,
44    /// Derived from Software Heritage persistent identifier (content-addressed)
45    Swhid,
46    /// Derived from SWID tag
47    Swid,
48    /// Derived from name and version (stable)
49    NameVersion,
50    /// Synthetically generated from group:name@version
51    Synthetic,
52    /// Format-specific identifier (least stable - may be UUID)
53    FormatSpecific,
54}
55
56impl IdSource {
57    /// Returns true if this source produces stable identifiers
58    #[must_use]
59    pub const fn is_stable(&self) -> bool {
60        matches!(
61            self,
62            Self::Purl | Self::Cpe | Self::Swhid | Self::Swid | Self::NameVersion | Self::Synthetic
63        )
64    }
65
66    /// Returns the reliability rank (lower is better)
67    #[must_use]
68    pub const fn reliability_rank(&self) -> u8 {
69        match self {
70            Self::Purl => 0,
71            Self::Cpe => 1,
72            Self::Swhid => 2,
73            Self::Swid => 3,
74            Self::NameVersion => 4,
75            Self::Synthetic => 5,
76            Self::FormatSpecific => 6,
77        }
78    }
79}
80
81impl CanonicalId {
82    /// Create a new canonical ID from a PURL
83    #[must_use]
84    pub fn from_purl(purl: &str) -> Self {
85        Self {
86            value: Self::normalize_purl(purl),
87            source: IdSource::Purl,
88            stable: true,
89        }
90    }
91
92    /// Create a new canonical ID from name and version
93    #[must_use]
94    pub fn from_name_version(name: &str, version: Option<&str>) -> Self {
95        let value = version.map_or_else(
96            || name.to_lowercase(),
97            |v| format!("{}@{}", name.to_lowercase(), v),
98        );
99        Self {
100            value,
101            source: IdSource::NameVersion,
102            stable: true,
103        }
104    }
105
106    /// Create a synthetic canonical ID from group, name, and version
107    ///
108    /// This provides a stable identifier when primary identifiers (PURL, CPE, SWID)
109    /// are not available. The format is: `group:name@version` or `name@version`.
110    #[must_use]
111    pub fn synthetic(group: Option<&str>, name: &str, version: Option<&str>) -> Self {
112        let value = match (group, version) {
113            (Some(g), Some(v)) => format!("{}:{}@{}", g.to_lowercase(), name.to_lowercase(), v),
114            (Some(g), None) => format!("{}:{}", g.to_lowercase(), name.to_lowercase()),
115            (None, Some(v)) => format!("{}@{}", name.to_lowercase(), v),
116            (None, None) => name.to_lowercase(),
117        };
118        Self {
119            value,
120            source: IdSource::Synthetic,
121            stable: true,
122        }
123    }
124
125    /// Create a new canonical ID from a format-specific identifier
126    ///
127    /// **Warning**: Format-specific IDs (like bom-ref UUIDs) are often unstable
128    /// across SBOM regenerations. Use `synthetic()` or other methods when possible.
129    #[must_use]
130    pub fn from_format_id(id: &str) -> Self {
131        // Check if this looks like a UUID (unstable)
132        let looks_like_uuid = id.len() == 36
133            && id.chars().filter(|c| *c == '-').count() == 4
134            && id.chars().all(|c| c.is_ascii_hexdigit() || c == '-');
135
136        Self {
137            value: id.to_string(),
138            source: IdSource::FormatSpecific,
139            stable: !looks_like_uuid,
140        }
141    }
142
143    /// Create from CPE
144    #[must_use]
145    pub fn from_cpe(cpe: &str) -> Self {
146        Self {
147            value: cpe.to_lowercase(),
148            source: IdSource::Cpe,
149            stable: true,
150        }
151    }
152
153    /// Create from SWID tag
154    #[must_use]
155    pub fn from_swid(swid: &str) -> Self {
156        Self {
157            value: swid.to_string(),
158            source: IdSource::Swid,
159            stable: true,
160        }
161    }
162
163    /// Create from a Software Heritage persistent identifier (SWHID).
164    ///
165    /// SWHIDs are content-addressed identifiers of the form
166    /// `swh:1:<kind>:<sha1-hex>[;<qualifier>=<value>...]`.
167    /// Named explicitly by CRA prEN 40000-1-3 `[PRE-7-RQ-07]` alongside PURL/CPE.
168    ///
169    /// Falls back to a `FormatSpecific` identifier (marked unstable) if the
170    /// input does not look like a valid SWHID.
171    #[must_use]
172    pub fn from_swhid(swhid: &str) -> Self {
173        match SwhidObject::parse(swhid) {
174            Ok(obj) => Self {
175                // Display reconstitutes the canonical lowercase form with qualifiers
176                value: obj.to_string(),
177                source: IdSource::Swhid,
178                stable: true,
179            },
180            Err(_) => Self {
181                value: swhid.to_string(),
182                source: IdSource::FormatSpecific,
183                stable: false,
184            },
185        }
186    }
187
188    /// Create from a structured `SwhidObject` (preferred internal path).
189    #[must_use]
190    pub fn from_swhid_object(obj: &SwhidObject) -> Self {
191        Self {
192            value: obj.to_string(),
193            source: IdSource::Swhid,
194            stable: true,
195        }
196    }
197
198    /// Get the canonical ID value
199    #[must_use]
200    pub fn value(&self) -> &str {
201        &self.value
202    }
203
204    /// Get the source of this identifier
205    #[must_use]
206    pub const fn source(&self) -> &IdSource {
207        &self.source
208    }
209
210    /// Returns true if this identifier is stable across SBOM regenerations
211    #[must_use]
212    pub const fn is_stable(&self) -> bool {
213        self.stable
214    }
215
216    /// Normalize a PURL string for comparison
217    fn normalize_purl(purl: &str) -> String {
218        // Basic normalization - a full implementation would use the packageurl crate
219        let mut normalized = purl.to_lowercase();
220
221        // Handle common ecosystem-specific normalizations
222        if normalized.starts_with("pkg:pypi/") {
223            // PyPI: normalize underscores, hyphens, and dots to hyphens
224            normalized = normalized.replace(['_', '.'], "-");
225        } else if normalized.starts_with("pkg:npm/") {
226            // NPM: decode URL-encoded scope
227            normalized = normalized.replace("%40", "@");
228        }
229
230        normalized
231    }
232}
233
234/// Software Heritage persistent identifier kind.
235///
236/// Per the SWHID spec (<https://www.swhid.org/>), every SWHID identifies one of
237/// five object kinds in the Software Heritage archive.
238#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
239pub enum SwhidKind {
240    /// File content (blob)
241    Cnt,
242    /// Directory (tree)
243    Dir,
244    /// Revision (commit)
245    Rev,
246    /// Release (tag)
247    Rel,
248    /// Snapshot (repository state)
249    Snp,
250}
251
252impl SwhidKind {
253    fn as_str(self) -> &'static str {
254        match self {
255            Self::Cnt => "cnt",
256            Self::Dir => "dir",
257            Self::Rev => "rev",
258            Self::Rel => "rel",
259            Self::Snp => "snp",
260        }
261    }
262
263    fn parse(s: &str) -> Option<Self> {
264        match s.to_ascii_lowercase().as_str() {
265            "cnt" => Some(Self::Cnt),
266            "dir" => Some(Self::Dir),
267            "rev" => Some(Self::Rev),
268            "rel" => Some(Self::Rel),
269            "snp" => Some(Self::Snp),
270            _ => None,
271        }
272    }
273}
274
275impl fmt::Display for SwhidKind {
276    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
277        f.write_str(self.as_str())
278    }
279}
280
281/// A structured Software Heritage persistent identifier.
282///
283/// Format: `swh:1:<kind>:<sha1-hex-40>[;<qualifier>=<value>...]`. Recognised
284/// by CRA prEN 40000-1-3 `[PRE-7-RQ-07]` as one of the three named identifier
285/// types (alongside PURL and CPE).
286///
287/// Serialised as a plain string in JSON to match CycloneDX/SPDX wire formats
288/// (`["swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", ...]`).
289#[derive(Debug, Clone, PartialEq, Eq, Hash)]
290pub struct SwhidObject {
291    /// Object kind (cnt/dir/rev/rel/snp)
292    pub kind: SwhidKind,
293    /// 20-byte SHA-1 of the canonical object representation
294    pub hash: [u8; 20],
295    /// Optional contextual qualifiers (origin, visit, anchor, path, lines)
296    pub qualifiers: Vec<(String, String)>,
297}
298
299/// Errors returned when parsing a SWHID string.
300#[derive(Debug, Clone, PartialEq, Eq)]
301pub enum SwhidParseError {
302    /// String didn't have the four-part `swh:1:<kind>:<hash>` shape
303    BadShape,
304    /// Prefix wasn't `swh:1:`
305    BadPrefix,
306    /// Kind wasn't one of cnt/dir/rev/rel/snp
307    BadKind,
308    /// Hash wasn't 40 hex characters
309    BadHash,
310    /// Qualifier didn't have the `key=value` shape
311    BadQualifier,
312}
313
314impl fmt::Display for SwhidParseError {
315    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316        match self {
317            Self::BadShape => f.write_str("SWHID does not have shape swh:1:<kind>:<hash>"),
318            Self::BadPrefix => f.write_str("SWHID prefix is not 'swh:1:'"),
319            Self::BadKind => f.write_str("SWHID kind must be one of cnt/dir/rev/rel/snp"),
320            Self::BadHash => f.write_str("SWHID hash must be 40 hexadecimal characters"),
321            Self::BadQualifier => f.write_str("SWHID qualifier missing '=' separator"),
322        }
323    }
324}
325
326impl std::error::Error for SwhidParseError {}
327
328impl SwhidObject {
329    /// Parse a SWHID string into structured form.
330    ///
331    /// Validation is case-insensitive on the prefix, kind, and hash; the
332    /// canonical form (returned by `Display`) is lowercase. Qualifier values
333    /// are preserved verbatim — the SWHID spec does not mandate a case
334    /// convention for qualifier values (e.g., URLs in `origin=`).
335    pub fn parse(s: &str) -> Result<Self, SwhidParseError> {
336        let (core, qualifier_str) = s.split_once(';').unwrap_or((s, ""));
337        let parts: Vec<&str> = core.split(':').collect();
338        if parts.len() != 4 {
339            return Err(SwhidParseError::BadShape);
340        }
341        if !parts[0].eq_ignore_ascii_case("swh") || parts[1] != "1" {
342            return Err(SwhidParseError::BadPrefix);
343        }
344        let kind = SwhidKind::parse(parts[2]).ok_or(SwhidParseError::BadKind)?;
345
346        if parts[3].len() != 40 || !parts[3].chars().all(|c| c.is_ascii_hexdigit()) {
347            return Err(SwhidParseError::BadHash);
348        }
349        let mut hash = [0u8; 20];
350        let bytes = parts[3].as_bytes();
351        for (i, byte) in hash.iter_mut().enumerate() {
352            let high = hex_digit(bytes[i * 2]).ok_or(SwhidParseError::BadHash)?;
353            let low = hex_digit(bytes[i * 2 + 1]).ok_or(SwhidParseError::BadHash)?;
354            *byte = (high << 4) | low;
355        }
356
357        let mut qualifiers = Vec::new();
358        if !qualifier_str.is_empty() {
359            for q in qualifier_str.split(';') {
360                if q.is_empty() {
361                    continue;
362                }
363                let (k, v) = q.split_once('=').ok_or(SwhidParseError::BadQualifier)?;
364                qualifiers.push((k.to_string(), v.to_string()));
365            }
366        }
367
368        Ok(Self {
369            kind,
370            hash,
371            qualifiers,
372        })
373    }
374
375    /// Canonical lowercase hex representation of the SHA-1 hash.
376    #[must_use]
377    pub fn hash_hex(&self) -> String {
378        let mut s = String::with_capacity(40);
379        for b in &self.hash {
380            s.push(hex_char(b >> 4));
381            s.push(hex_char(b & 0xf));
382        }
383        s
384    }
385}
386
387const fn hex_char(n: u8) -> char {
388    match n {
389        0..=9 => (b'0' + n) as char,
390        10..=15 => (b'a' + n - 10) as char,
391        _ => '?',
392    }
393}
394
395const fn hex_digit(c: u8) -> Option<u8> {
396    match c {
397        b'0'..=b'9' => Some(c - b'0'),
398        b'a'..=b'f' => Some(c - b'a' + 10),
399        b'A'..=b'F' => Some(c - b'A' + 10),
400        _ => None,
401    }
402}
403
404impl fmt::Display for SwhidObject {
405    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
406        write!(f, "swh:1:{}:{}", self.kind, self.hash_hex())?;
407        for (k, v) in &self.qualifiers {
408            write!(f, ";{k}={v}")?;
409        }
410        Ok(())
411    }
412}
413
414// Keep the wire format as a plain string so CycloneDX/SPDX I/O stays unchanged.
415impl Serialize for SwhidObject {
416    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
417        serializer.serialize_str(&self.to_string())
418    }
419}
420
421impl<'de> Deserialize<'de> for SwhidObject {
422    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
423        let s = String::deserialize(deserializer)?;
424        Self::parse(&s).map_err(serde::de::Error::custom)
425    }
426}
427
428/// Validate a SWHID string (convenience predicate over `SwhidObject::parse`).
429#[must_use]
430pub fn is_valid_swhid(s: &str) -> bool {
431    SwhidObject::parse(s).is_ok()
432}
433
434impl PartialEq for CanonicalId {
435    fn eq(&self, other: &Self) -> bool {
436        self.value == other.value
437    }
438}
439
440impl Hash for CanonicalId {
441    fn hash<H: Hasher>(&self, state: &mut H) {
442        self.value.hash(state);
443    }
444}
445
446impl fmt::Display for CanonicalId {
447    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
448        write!(f, "{}", self.value)
449    }
450}
451
452/// Component identifiers from various sources
453#[derive(Debug, Clone, Default, Serialize, Deserialize)]
454pub struct ComponentIdentifiers {
455    /// Package URL (preferred identifier)
456    pub purl: Option<String>,
457    /// Common Platform Enumeration identifiers
458    pub cpe: Vec<String>,
459    /// Software Heritage persistent identifiers (SWHIDs).
460    ///
461    /// Multiple values supported because a component may be expressible by
462    /// several SWHID kinds (e.g., one `cnt` per archive entry plus a `dir`
463    /// for the unpacked tree). CRA prEN 40000-1-3 `[PRE-7-RQ-07]` accepts
464    /// SWHIDs as one of three named identifier types.
465    ///
466    /// Stored as structured `SwhidObject` for downstream consumers; on the
467    /// wire (JSON), each element serialises as a plain string to match the
468    /// CycloneDX / SPDX 3.0 `swhid` array shape.
469    #[serde(default, skip_serializing_if = "Vec::is_empty")]
470    pub swhid: Vec<SwhidObject>,
471    /// Software Identification tag
472    pub swid: Option<String>,
473    /// Original format-specific identifier
474    pub format_id: String,
475    /// Known aliases for this component
476    pub aliases: Vec<String>,
477}
478
479/// Result of canonical ID generation, including stability information
480#[derive(Debug, Clone)]
481pub struct CanonicalIdResult {
482    /// The canonical ID
483    pub id: CanonicalId,
484    /// Warning message if fallback was used
485    pub warning: Option<String>,
486}
487
488impl ComponentIdentifiers {
489    /// Create a new empty set of identifiers
490    #[must_use]
491    pub fn new(format_id: String) -> Self {
492        Self {
493            format_id,
494            ..Default::default()
495        }
496    }
497
498    /// Get the best available canonical ID (without component context)
499    ///
500    /// For better stability, prefer `canonical_id_with_context()` which can
501    /// generate synthetic IDs from component metadata.
502    #[must_use]
503    pub fn canonical_id(&self) -> CanonicalId {
504        // Tiered fallback: PURL → CPE → SWHID → SWID → format_id
505        if let Some(purl) = &self.purl {
506            return CanonicalId::from_purl(purl);
507        }
508        if let Some(cpe) = self.cpe.first() {
509            return CanonicalId::from_cpe(cpe);
510        }
511        if let Some(swhid) = self.swhid.first() {
512            return CanonicalId::from_swhid_object(swhid);
513        }
514        if let Some(swid) = &self.swid {
515            return CanonicalId::from_swid(swid);
516        }
517        CanonicalId::from_format_id(&self.format_id)
518    }
519
520    /// Get the best available canonical ID with component context for stable fallback
521    ///
522    /// This method uses a tiered fallback strategy:
523    /// 1. PURL (most reliable)
524    /// 2. CPE
525    /// 3. SWHID (content-addressed, CRA prEN 40000-1-3 named)
526    /// 4. SWID
527    /// 5. Synthetic (group:name@version) - stable across regenerations
528    /// 6. Format-specific ID (least stable)
529    ///
530    /// Returns both the ID and any warnings about stability.
531    #[must_use]
532    pub fn canonical_id_with_context(
533        &self,
534        name: &str,
535        version: Option<&str>,
536        group: Option<&str>,
537    ) -> CanonicalIdResult {
538        // Tier 1: PURL (best)
539        if let Some(purl) = &self.purl {
540            return CanonicalIdResult {
541                id: CanonicalId::from_purl(purl),
542                warning: None,
543            };
544        }
545
546        // Tier 2: CPE
547        if let Some(cpe) = self.cpe.first() {
548            return CanonicalIdResult {
549                id: CanonicalId::from_cpe(cpe),
550                warning: None,
551            };
552        }
553
554        // Tier 3: SWHID (content-addressed)
555        if let Some(swhid) = self.swhid.first() {
556            return CanonicalIdResult {
557                id: CanonicalId::from_swhid_object(swhid),
558                warning: None,
559            };
560        }
561
562        // Tier 4: SWID
563        if let Some(swid) = &self.swid {
564            return CanonicalIdResult {
565                id: CanonicalId::from_swid(swid),
566                warning: None,
567            };
568        }
569
570        // Tier 5: Synthetic from name/version/group (stable)
571        // Only use if we have at least a name
572        if !name.is_empty() {
573            return CanonicalIdResult {
574                id: CanonicalId::synthetic(group, name, version),
575                warning: Some(format!(
576                    "Component '{name}' lacks PURL/CPE/SWHID/SWID identifiers; using synthetic ID. \
577                     Consider enriching SBOM with package URLs for accurate diffing."
578                )),
579            };
580        }
581
582        // Tier 6: Format-specific (least stable - may be UUID)
583        let id = CanonicalId::from_format_id(&self.format_id);
584        let warning = if id.is_stable() {
585            Some(format!(
586                "Component uses format-specific ID '{}' without standard identifiers.",
587                self.format_id
588            ))
589        } else {
590            Some(format!(
591                "Component uses unstable format-specific ID '{}'. \
592                 This may cause inaccurate diff results across SBOM regenerations.",
593                self.format_id
594            ))
595        };
596
597        CanonicalIdResult { id, warning }
598    }
599
600    /// Check if this component has any stable identifiers
601    #[must_use]
602    pub fn has_stable_id(&self) -> bool {
603        self.purl.is_some() || !self.cpe.is_empty() || !self.swhid.is_empty() || self.swid.is_some()
604    }
605
606    /// Get the reliability level of available identifiers
607    #[must_use]
608    pub fn id_reliability(&self) -> IdReliability {
609        if self.purl.is_some() {
610            IdReliability::High
611        } else if !self.cpe.is_empty() || !self.swhid.is_empty() || self.swid.is_some() {
612            IdReliability::Medium
613        } else {
614            IdReliability::Low
615        }
616    }
617
618    /// Returns true if this component has any of the CRA-named identifier
619    /// types (PURL, CPE, SWHID, or SWID), satisfying CRA Annex I Part II
620    /// identifier-traceability and prEN 40000-1-3 `[PRE-7-RQ-07]`.
621    #[must_use]
622    pub fn has_cra_identifier(&self) -> bool {
623        self.purl.is_some() || !self.cpe.is_empty() || !self.swhid.is_empty() || self.swid.is_some()
624    }
625}
626
627/// Reliability level of component identification
628#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
629pub enum IdReliability {
630    /// High reliability (PURL available)
631    High,
632    /// Medium reliability (CPE or SWID available)
633    Medium,
634    /// Low reliability (synthetic or format-specific only)
635    Low,
636}
637
638impl fmt::Display for IdReliability {
639    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
640        match self {
641            Self::High => write!(f, "high"),
642            Self::Medium => write!(f, "medium"),
643            Self::Low => write!(f, "low"),
644        }
645    }
646}
647
648/// Ecosystem/package manager type
649#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
650#[non_exhaustive]
651pub enum Ecosystem {
652    Npm,
653    PyPi,
654    Cargo,
655    Maven,
656    Golang,
657    Nuget,
658    RubyGems,
659    Composer,
660    CocoaPods,
661    Swift,
662    Hex,
663    Pub,
664    Hackage,
665    Cpan,
666    Cran,
667    Conda,
668    Conan,
669    Deb,
670    Rpm,
671    Apk,
672    Generic,
673    Unknown(String),
674}
675
676impl Ecosystem {
677    /// Parse ecosystem from PURL type
678    #[must_use]
679    pub fn from_purl_type(purl_type: &str) -> Self {
680        match purl_type.to_lowercase().as_str() {
681            "npm" => Self::Npm,
682            "pypi" => Self::PyPi,
683            "cargo" => Self::Cargo,
684            "maven" => Self::Maven,
685            "golang" | "go" => Self::Golang,
686            "nuget" => Self::Nuget,
687            "gem" => Self::RubyGems,
688            "composer" => Self::Composer,
689            "cocoapods" => Self::CocoaPods,
690            "swift" => Self::Swift,
691            "hex" => Self::Hex,
692            "pub" => Self::Pub,
693            "hackage" => Self::Hackage,
694            "cpan" => Self::Cpan,
695            "cran" => Self::Cran,
696            "conda" => Self::Conda,
697            "conan" => Self::Conan,
698            "deb" => Self::Deb,
699            "rpm" => Self::Rpm,
700            "apk" => Self::Apk,
701            "generic" => Self::Generic,
702            other => Self::Unknown(other.to_string()),
703        }
704    }
705}
706
707impl fmt::Display for Ecosystem {
708    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
709        match self {
710            Self::Npm => write!(f, "npm"),
711            Self::PyPi => write!(f, "pypi"),
712            Self::Cargo => write!(f, "cargo"),
713            Self::Maven => write!(f, "maven"),
714            Self::Golang => write!(f, "golang"),
715            Self::Nuget => write!(f, "nuget"),
716            Self::RubyGems => write!(f, "gem"),
717            Self::Composer => write!(f, "composer"),
718            Self::CocoaPods => write!(f, "cocoapods"),
719            Self::Swift => write!(f, "swift"),
720            Self::Hex => write!(f, "hex"),
721            Self::Pub => write!(f, "pub"),
722            Self::Hackage => write!(f, "hackage"),
723            Self::Cpan => write!(f, "cpan"),
724            Self::Cran => write!(f, "cran"),
725            Self::Conda => write!(f, "conda"),
726            Self::Conan => write!(f, "conan"),
727            Self::Deb => write!(f, "deb"),
728            Self::Rpm => write!(f, "rpm"),
729            Self::Apk => write!(f, "apk"),
730            Self::Generic => write!(f, "generic"),
731            Self::Unknown(s) => write!(f, "{s}"),
732        }
733    }
734}
735
736// ============================================================================
737// ComponentRef: Lightweight reference combining ID and display name
738// ============================================================================
739
740/// A lightweight reference to a component, combining its stable ID with
741/// a human-readable display name.
742///
743/// This type is used throughout the diff system and TUI to:
744/// - Navigate and link by ID (stable, unique)
745/// - Display by name (human-readable)
746///
747/// # Example
748/// ```ignore
749/// let comp_ref = ComponentRef::new(component.canonical_id.clone(), &component.name);
750/// println!("Component: {} (ID: {})", comp_ref.name(), comp_ref.id());
751/// ```
752#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
753pub struct ComponentRef {
754    /// The stable canonical ID for linking and navigation
755    id: CanonicalId,
756    /// Human-readable name for display
757    name: String,
758    /// Optional version for display context
759    #[serde(skip_serializing_if = "Option::is_none")]
760    version: Option<String>,
761}
762
763impl ComponentRef {
764    /// Create a new component reference
765    pub fn new(id: CanonicalId, name: impl Into<String>) -> Self {
766        Self {
767            id,
768            name: name.into(),
769            version: None,
770        }
771    }
772
773    /// Create a component reference with version
774    pub fn with_version(id: CanonicalId, name: impl Into<String>, version: Option<String>) -> Self {
775        Self {
776            id,
777            name: name.into(),
778            version,
779        }
780    }
781
782    /// Create from a Component
783    #[must_use]
784    pub fn from_component(component: &super::Component) -> Self {
785        Self {
786            id: component.canonical_id.clone(),
787            name: component.name.clone(),
788            version: component.version.clone(),
789        }
790    }
791
792    /// Get the canonical ID
793    #[must_use]
794    pub const fn id(&self) -> &CanonicalId {
795        &self.id
796    }
797
798    /// Get the ID as a string
799    #[must_use]
800    pub fn id_str(&self) -> &str {
801        self.id.value()
802    }
803
804    /// Get the display name
805    #[must_use]
806    pub fn name(&self) -> &str {
807        &self.name
808    }
809
810    /// Get the version if available
811    #[must_use]
812    pub fn version(&self) -> Option<&str> {
813        self.version.as_deref()
814    }
815
816    /// Get display string with version if available
817    #[must_use]
818    pub fn display_with_version(&self) -> String {
819        self.version
820            .as_ref()
821            .map_or_else(|| self.name.clone(), |v| format!("{}@{}", self.name, v))
822    }
823
824    /// Check if this ref matches a given ID
825    #[must_use]
826    pub fn matches_id(&self, id: &CanonicalId) -> bool {
827        &self.id == id
828    }
829
830    /// Check if this ref matches a given ID string
831    #[must_use]
832    pub fn matches_id_str(&self, id_str: &str) -> bool {
833        self.id.value() == id_str
834    }
835}
836
837impl fmt::Display for ComponentRef {
838    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
839        write!(f, "{}", self.name)
840    }
841}
842
843impl From<&super::Component> for ComponentRef {
844    fn from(component: &super::Component) -> Self {
845        Self::from_component(component)
846    }
847}
848
849/// A reference to a vulnerability with its associated component
850#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
851pub struct VulnerabilityRef2 {
852    /// Vulnerability ID (e.g., CVE-2021-44228)
853    pub vuln_id: String,
854    /// Reference to the affected component
855    pub component: ComponentRef,
856}
857
858impl VulnerabilityRef2 {
859    /// Create a new vulnerability reference
860    pub fn new(vuln_id: impl Into<String>, component: ComponentRef) -> Self {
861        Self {
862            vuln_id: vuln_id.into(),
863            component,
864        }
865    }
866
867    /// Get the component's canonical ID
868    #[must_use]
869    pub const fn component_id(&self) -> &CanonicalId {
870        self.component.id()
871    }
872
873    /// Get the component name for display
874    #[must_use]
875    pub fn component_name(&self) -> &str {
876        self.component.name()
877    }
878}
879
880#[cfg(test)]
881mod swhid_tests {
882    use super::*;
883
884    #[test]
885    fn valid_swhid_content() {
886        assert!(is_valid_swhid(
887            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
888        ));
889    }
890
891    #[test]
892    fn valid_swhid_all_kinds() {
893        for kind in ["cnt", "dir", "rev", "rel", "snp"] {
894            let s = format!("swh:1:{kind}:94a9ed024d3859793618152ea559a168bbcbb5e2");
895            assert!(is_valid_swhid(&s), "kind {kind} should be valid");
896        }
897    }
898
899    #[test]
900    fn valid_swhid_with_qualifier() {
901        let swhid =
902            "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d;origin=https://github.com/x/y";
903        assert!(is_valid_swhid(swhid));
904    }
905
906    #[test]
907    fn invalid_swhid_wrong_prefix() {
908        assert!(!is_valid_swhid(
909            "swhid:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
910        ));
911    }
912
913    #[test]
914    fn invalid_swhid_unknown_kind() {
915        assert!(!is_valid_swhid(
916            "swh:1:foo:94a9ed024d3859793618152ea559a168bbcbb5e2"
917        ));
918    }
919
920    #[test]
921    fn invalid_swhid_short_hash() {
922        assert!(!is_valid_swhid("swh:1:cnt:94a9ed024d"));
923    }
924
925    #[test]
926    fn invalid_swhid_non_hex() {
927        assert!(!is_valid_swhid(
928            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbZZZZ"
929        ));
930    }
931
932    #[test]
933    fn invalid_swhid_falls_back_to_format_specific() {
934        let id = CanonicalId::from_swhid("swh:1:foo:bad");
935        assert_eq!(id.source(), &IdSource::FormatSpecific);
936        assert!(!id.is_stable());
937    }
938
939    #[test]
940    fn valid_swhid_construction_and_round_trip() {
941        let raw = "swh:1:cnt:94A9ED024D3859793618152EA559A168BBCBB5E2";
942        let id = CanonicalId::from_swhid(raw);
943        assert_eq!(id.source(), &IdSource::Swhid);
944        assert!(id.is_stable());
945        assert_eq!(
946            id.value(),
947            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
948        );
949    }
950
951    #[test]
952    fn swhid_qualifier_preserved_after_normalization() {
953        let raw = "swh:1:REV:309CF2674EE7A0749978CF8265AB91A60AEA0F7D;origin=Https://X.Y";
954        let id = CanonicalId::from_swhid(raw);
955        assert_eq!(
956            id.value(),
957            "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d;origin=Https://X.Y"
958        );
959    }
960
961    #[test]
962    fn component_identifiers_canonical_id_prefers_purl() {
963        let mut ids = ComponentIdentifiers::new("synthetic-1".to_string());
964        ids.purl = Some("pkg:cargo/serde@1.0.0".to_string());
965        ids.swhid.push(
966            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap(),
967        );
968        assert_eq!(ids.canonical_id().source(), &IdSource::Purl);
969    }
970
971    #[test]
972    fn component_identifiers_canonical_id_uses_swhid_when_purl_absent() {
973        let mut ids = ComponentIdentifiers::new("synthetic-1".to_string());
974        ids.swhid.push(
975            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap(),
976        );
977        let id = ids.canonical_id();
978        assert_eq!(id.source(), &IdSource::Swhid);
979    }
980
981    #[test]
982    fn has_cra_identifier_recognizes_swhid_only() {
983        let mut ids = ComponentIdentifiers::new("synthetic-1".to_string());
984        assert!(!ids.has_cra_identifier());
985        ids.swhid.push(
986            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap(),
987        );
988        assert!(ids.has_cra_identifier());
989    }
990
991    #[test]
992    fn swhid_object_round_trip_via_display() {
993        let raw = "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2";
994        let obj = SwhidObject::parse(raw).unwrap();
995        assert_eq!(obj.kind, SwhidKind::Cnt);
996        assert_eq!(obj.qualifiers.len(), 0);
997        assert_eq!(obj.to_string(), raw);
998    }
999
1000    #[test]
1001    fn swhid_object_preserves_qualifiers_in_order() {
1002        let raw = "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d;origin=https://github.com/x/y;path=/src";
1003        let obj = SwhidObject::parse(raw).unwrap();
1004        assert_eq!(obj.kind, SwhidKind::Rev);
1005        assert_eq!(obj.qualifiers.len(), 2);
1006        assert_eq!(
1007            obj.qualifiers[0],
1008            ("origin".to_string(), "https://github.com/x/y".to_string())
1009        );
1010        assert_eq!(obj.qualifiers[1], ("path".to_string(), "/src".to_string()));
1011        assert_eq!(obj.to_string(), raw);
1012    }
1013
1014    #[test]
1015    fn swhid_object_lowercases_uppercase_input() {
1016        let raw = "SWH:1:CNT:94A9ED024D3859793618152EA559A168BBCBB5E2";
1017        let obj = SwhidObject::parse(raw).unwrap();
1018        assert_eq!(
1019            obj.to_string(),
1020            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
1021        );
1022    }
1023
1024    #[test]
1025    fn swhid_object_serde_round_trip_as_string() {
1026        let obj = SwhidObject::parse("swh:1:dir:309cf2674ee7a0749978cf8265ab91a60aea0f7d").unwrap();
1027        let json = serde_json::to_string(&obj).unwrap();
1028        assert_eq!(
1029            json,
1030            "\"swh:1:dir:309cf2674ee7a0749978cf8265ab91a60aea0f7d\""
1031        );
1032        let back: SwhidObject = serde_json::from_str(&json).unwrap();
1033        assert_eq!(back, obj);
1034    }
1035
1036    #[test]
1037    fn swhid_object_parse_errors() {
1038        assert_eq!(
1039            SwhidObject::parse("not-a-swhid").unwrap_err(),
1040            SwhidParseError::BadShape
1041        );
1042        assert_eq!(
1043            SwhidObject::parse("swh:2:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap_err(),
1044            SwhidParseError::BadPrefix
1045        );
1046        assert_eq!(
1047            SwhidObject::parse("swh:1:foo:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap_err(),
1048            SwhidParseError::BadKind
1049        );
1050        assert_eq!(
1051            SwhidObject::parse("swh:1:cnt:not-hex").unwrap_err(),
1052            SwhidParseError::BadHash
1053        );
1054        assert_eq!(
1055            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;malformed",)
1056                .unwrap_err(),
1057            SwhidParseError::BadQualifier
1058        );
1059    }
1060
1061    #[test]
1062    fn swhid_object_serializes_within_component_identifiers_as_array_of_strings() {
1063        let mut ids = ComponentIdentifiers::new("synthetic-1".to_string());
1064        ids.swhid.push(
1065            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap(),
1066        );
1067        ids.swhid.push(
1068            SwhidObject::parse("swh:1:dir:309cf2674ee7a0749978cf8265ab91a60aea0f7d").unwrap(),
1069        );
1070        let json = serde_json::to_value(&ids).unwrap();
1071        let arr = json
1072            .get("swhid")
1073            .and_then(|v| v.as_array())
1074            .expect("swhid serialises as array");
1075        assert_eq!(arr.len(), 2);
1076        assert!(arr.iter().all(serde_json::Value::is_string));
1077        // Round-trip via deserialize keeps structure intact
1078        let parsed: ComponentIdentifiers = serde_json::from_value(json).unwrap();
1079        assert_eq!(parsed.swhid.len(), 2);
1080        assert_eq!(parsed.swhid[0].kind, SwhidKind::Cnt);
1081        assert_eq!(parsed.swhid[1].kind, SwhidKind::Dir);
1082    }
1083}