Skip to main content

sbom_tools/model/
identifiers.rs

1//! Canonical identifiers for SBOM components.
2//!
3//! This module provides stable, comparable identifiers for components across
4//! different SBOM formats. The identification strategy uses a tiered fallback:
5//!
6//! 1. **PURL** (Package URL) - Most reliable, globally unique
7//! 2. **CPE** (Common Platform Enumeration) - Industry standard for vulnerability matching
8//! 3. **SWHID** (Software Heritage persistent ID) - Content-addressed, ISO/IEC 18670
9//! 4. **SWID** (Software Identification) - ISO standard tag
10//! 5. **Synthetic** - Generated from group:name@version (stable across regenerations)
11//! 6. **`FormatSpecific`** - Original format ID (least stable, may be UUIDs)
12//!
13//! SWHID is one of the three identifier types named by CRA prEN 40000-1-3
14//! `[PRE-7-RQ-07]` (alongside PURL and CPE).
15
16use serde::{Deserialize, Serialize};
17use std::fmt;
18use std::hash::{Hash, Hasher};
19
20/// Canonical identifier for a component.
21///
22/// This provides a stable, comparable identifier across different SBOM formats.
23/// The identifier is derived from the PURL when available, falling back through
24/// a tiered strategy to ensure stability.
25#[derive(Debug, Clone, Eq, Serialize, Deserialize)]
26pub struct CanonicalId {
27    /// The normalized identifier string
28    value: String,
29    /// Source of the identifier
30    source: IdSource,
31    /// Whether this ID is considered stable across SBOM regenerations
32    #[serde(default)]
33    stable: bool,
34}
35
36/// Source of the canonical identifier, ordered by reliability
37#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
38#[non_exhaustive]
39pub enum IdSource {
40    /// Derived from Package URL (most reliable)
41    Purl,
42    /// Derived from CPE
43    Cpe,
44    /// Derived from Software Heritage persistent identifier (content-addressed)
45    Swhid,
46    /// Derived from SWID tag
47    Swid,
48    /// Derived from name and version (stable)
49    NameVersion,
50    /// Synthetically generated from group:name@version
51    Synthetic,
52    /// Format-specific identifier (least stable - may be UUID)
53    FormatSpecific,
54}
55
56impl IdSource {
57    /// Returns true if this source produces stable identifiers
58    #[must_use]
59    pub const fn is_stable(&self) -> bool {
60        matches!(
61            self,
62            Self::Purl | Self::Cpe | Self::Swhid | Self::Swid | Self::NameVersion | Self::Synthetic
63        )
64    }
65
66    /// Returns the reliability rank (lower is better)
67    #[must_use]
68    pub const fn reliability_rank(&self) -> u8 {
69        match self {
70            Self::Purl => 0,
71            Self::Cpe => 1,
72            Self::Swhid => 2,
73            Self::Swid => 3,
74            Self::NameVersion => 4,
75            Self::Synthetic => 5,
76            Self::FormatSpecific => 6,
77        }
78    }
79}
80
81impl CanonicalId {
82    /// Create a new canonical ID from a PURL
83    #[must_use]
84    pub fn from_purl(purl: &str) -> Self {
85        Self {
86            value: Self::normalize_purl(purl),
87            source: IdSource::Purl,
88            stable: true,
89        }
90    }
91
92    /// Create a new canonical ID from name and version
93    #[must_use]
94    pub fn from_name_version(name: &str, version: Option<&str>) -> Self {
95        let value = version.map_or_else(
96            || name.to_lowercase(),
97            |v| format!("{}@{}", name.to_lowercase(), v),
98        );
99        Self {
100            value,
101            source: IdSource::NameVersion,
102            stable: true,
103        }
104    }
105
106    /// Create a synthetic canonical ID from group, name, and version
107    ///
108    /// This provides a stable identifier when primary identifiers (PURL, CPE, SWID)
109    /// are not available. The format is: `group:name@version` or `name@version`.
110    #[must_use]
111    pub fn synthetic(group: Option<&str>, name: &str, version: Option<&str>) -> Self {
112        let value = match (group, version) {
113            (Some(g), Some(v)) => format!("{}:{}@{}", g.to_lowercase(), name.to_lowercase(), v),
114            (Some(g), None) => format!("{}:{}", g.to_lowercase(), name.to_lowercase()),
115            (None, Some(v)) => format!("{}@{}", name.to_lowercase(), v),
116            (None, None) => name.to_lowercase(),
117        };
118        Self {
119            value,
120            source: IdSource::Synthetic,
121            stable: true,
122        }
123    }
124
125    /// Create a new canonical ID from a format-specific identifier
126    ///
127    /// **Warning**: Format-specific IDs (like bom-ref UUIDs) are often unstable
128    /// across SBOM regenerations. Use `synthetic()` or other methods when possible.
129    #[must_use]
130    pub fn from_format_id(id: &str) -> Self {
131        // Check if this looks like a UUID (unstable)
132        let looks_like_uuid = id.len() == 36
133            && id.chars().filter(|c| *c == '-').count() == 4
134            && id.chars().all(|c| c.is_ascii_hexdigit() || c == '-');
135
136        Self {
137            value: id.to_string(),
138            source: IdSource::FormatSpecific,
139            stable: !looks_like_uuid,
140        }
141    }
142
143    /// Create from CPE
144    #[must_use]
145    pub fn from_cpe(cpe: &str) -> Self {
146        Self {
147            value: cpe.to_lowercase(),
148            source: IdSource::Cpe,
149            stable: true,
150        }
151    }
152
153    /// Create from SWID tag
154    #[must_use]
155    pub fn from_swid(swid: &str) -> Self {
156        Self {
157            value: swid.to_string(),
158            source: IdSource::Swid,
159            stable: true,
160        }
161    }
162
163    /// Create from a Software Heritage persistent identifier (SWHID).
164    ///
165    /// SWHIDs are content-addressed identifiers of the form
166    /// `swh:1:<kind>:<sha1-hex>[;<qualifier>=<value>...]`.
167    /// Named explicitly by CRA prEN 40000-1-3 `[PRE-7-RQ-07]` alongside PURL/CPE.
168    ///
169    /// Falls back to a `FormatSpecific` identifier (marked unstable) if the
170    /// input does not look like a valid SWHID.
171    #[must_use]
172    pub fn from_swhid(swhid: &str) -> Self {
173        match SwhidObject::parse(swhid) {
174            Ok(obj) => Self {
175                // Display reconstitutes the canonical lowercase form with qualifiers
176                value: obj.to_string(),
177                source: IdSource::Swhid,
178                stable: true,
179            },
180            Err(_) => Self {
181                value: swhid.to_string(),
182                source: IdSource::FormatSpecific,
183                stable: false,
184            },
185        }
186    }
187
188    /// Create from a structured `SwhidObject` (preferred internal path).
189    #[must_use]
190    pub fn from_swhid_object(obj: &SwhidObject) -> Self {
191        Self {
192            value: obj.to_string(),
193            source: IdSource::Swhid,
194            stable: true,
195        }
196    }
197
198    /// Get the canonical ID value
199    #[must_use]
200    pub fn value(&self) -> &str {
201        &self.value
202    }
203
204    /// Get the source of this identifier
205    #[must_use]
206    pub const fn source(&self) -> &IdSource {
207        &self.source
208    }
209
210    /// Returns true if this identifier is stable across SBOM regenerations
211    #[must_use]
212    pub const fn is_stable(&self) -> bool {
213        self.stable
214    }
215
216    /// Normalize a PURL string for comparison
217    fn normalize_purl(purl: &str) -> String {
218        // Basic normalization - a full implementation would use the packageurl crate
219        let mut normalized = purl.to_lowercase();
220
221        // Handle common ecosystem-specific normalizations
222        if normalized.starts_with("pkg:pypi/") {
223            // PyPI: normalize underscores, hyphens, and dots to hyphens
224            normalized = normalized.replace(['_', '.'], "-");
225        } else if normalized.starts_with("pkg:npm/") {
226            // NPM: decode URL-encoded scope
227            normalized = normalized.replace("%40", "@");
228        }
229
230        normalized
231    }
232}
233
234/// Software Heritage persistent identifier kind.
235///
236/// Per the SWHID spec (<https://www.swhid.org/>), every SWHID identifies one of
237/// five object kinds in the Software Heritage archive.
238#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
239pub enum SwhidKind {
240    /// File content (blob)
241    Cnt,
242    /// Directory (tree)
243    Dir,
244    /// Revision (commit)
245    Rev,
246    /// Release (tag)
247    Rel,
248    /// Snapshot (repository state)
249    Snp,
250}
251
252impl SwhidKind {
253    fn as_str(self) -> &'static str {
254        match self {
255            Self::Cnt => "cnt",
256            Self::Dir => "dir",
257            Self::Rev => "rev",
258            Self::Rel => "rel",
259            Self::Snp => "snp",
260        }
261    }
262
263    fn parse(s: &str) -> Option<Self> {
264        match s.to_ascii_lowercase().as_str() {
265            "cnt" => Some(Self::Cnt),
266            "dir" => Some(Self::Dir),
267            "rev" => Some(Self::Rev),
268            "rel" => Some(Self::Rel),
269            "snp" => Some(Self::Snp),
270            _ => None,
271        }
272    }
273}
274
275impl fmt::Display for SwhidKind {
276    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
277        f.write_str(self.as_str())
278    }
279}
280
281/// A structured Software Heritage persistent identifier.
282///
283/// Format: `swh:1:<kind>:<sha1-hex-40>[;<qualifier>=<value>...]`. Recognised
284/// by CRA prEN 40000-1-3 `[PRE-7-RQ-07]` as one of the three named identifier
285/// types (alongside PURL and CPE).
286///
287/// Serialised as a plain string in JSON to match CycloneDX/SPDX wire formats
288/// (`["swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", ...]`).
289#[derive(Debug, Clone, PartialEq, Eq, Hash)]
290pub struct SwhidObject {
291    /// Object kind (cnt/dir/rev/rel/snp)
292    pub kind: SwhidKind,
293    /// 20-byte SHA-1 of the canonical object representation
294    pub hash: [u8; 20],
295    /// Optional contextual qualifiers (origin, visit, anchor, path, lines)
296    pub qualifiers: Vec<(String, String)>,
297}
298
299/// Errors returned when parsing a SWHID string.
300#[derive(Debug, Clone, PartialEq, Eq)]
301pub enum SwhidParseError {
302    /// String didn't have the four-part `swh:1:<kind>:<hash>` shape
303    BadShape,
304    /// Prefix wasn't `swh:1:`
305    BadPrefix,
306    /// Kind wasn't one of cnt/dir/rev/rel/snp
307    BadKind,
308    /// Hash wasn't 40 hex characters
309    BadHash,
310    /// Qualifier didn't have the `key=value` shape
311    BadQualifier,
312}
313
314impl fmt::Display for SwhidParseError {
315    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316        match self {
317            Self::BadShape => f.write_str("SWHID does not have shape swh:1:<kind>:<hash>"),
318            Self::BadPrefix => f.write_str("SWHID prefix is not 'swh:1:'"),
319            Self::BadKind => f.write_str("SWHID kind must be one of cnt/dir/rev/rel/snp"),
320            Self::BadHash => f.write_str("SWHID hash must be 40 hexadecimal characters"),
321            Self::BadQualifier => f.write_str("SWHID qualifier missing '=' separator"),
322        }
323    }
324}
325
326impl std::error::Error for SwhidParseError {}
327
328impl SwhidObject {
329    /// Parse a SWHID string into structured form.
330    ///
331    /// Validation is case-insensitive on the prefix, kind, and hash; the
332    /// canonical form (returned by `Display`) is lowercase. Qualifier values
333    /// are preserved verbatim — the SWHID spec does not mandate a case
334    /// convention for qualifier values (e.g., URLs in `origin=`).
335    pub fn parse(s: &str) -> Result<Self, SwhidParseError> {
336        let (core, qualifier_str) = s.split_once(';').unwrap_or((s, ""));
337        let parts: Vec<&str> = core.split(':').collect();
338        if parts.len() != 4 {
339            return Err(SwhidParseError::BadShape);
340        }
341        if !parts[0].eq_ignore_ascii_case("swh") || parts[1] != "1" {
342            return Err(SwhidParseError::BadPrefix);
343        }
344        let kind = SwhidKind::parse(parts[2]).ok_or(SwhidParseError::BadKind)?;
345
346        if parts[3].len() != 40 || !parts[3].chars().all(|c| c.is_ascii_hexdigit()) {
347            return Err(SwhidParseError::BadHash);
348        }
349        let mut hash = [0u8; 20];
350        let bytes = parts[3].as_bytes();
351        for (i, byte) in hash.iter_mut().enumerate() {
352            let high = hex_digit(bytes[i * 2]).ok_or(SwhidParseError::BadHash)?;
353            let low = hex_digit(bytes[i * 2 + 1]).ok_or(SwhidParseError::BadHash)?;
354            *byte = (high << 4) | low;
355        }
356
357        let mut qualifiers = Vec::new();
358        if !qualifier_str.is_empty() {
359            for q in qualifier_str.split(';') {
360                if q.is_empty() {
361                    continue;
362                }
363                let (k, v) = q.split_once('=').ok_or(SwhidParseError::BadQualifier)?;
364                qualifiers.push((k.to_string(), v.to_string()));
365            }
366        }
367
368        Ok(Self {
369            kind,
370            hash,
371            qualifiers,
372        })
373    }
374
375    /// Canonical lowercase hex representation of the SHA-1 hash.
376    #[must_use]
377    pub fn hash_hex(&self) -> String {
378        let mut s = String::with_capacity(40);
379        for b in &self.hash {
380            s.push(hex_char(b >> 4));
381            s.push(hex_char(b & 0xf));
382        }
383        s
384    }
385}
386
387const fn hex_char(n: u8) -> char {
388    match n {
389        0..=9 => (b'0' + n) as char,
390        10..=15 => (b'a' + n - 10) as char,
391        _ => '?',
392    }
393}
394
395const fn hex_digit(c: u8) -> Option<u8> {
396    match c {
397        b'0'..=b'9' => Some(c - b'0'),
398        b'a'..=b'f' => Some(c - b'a' + 10),
399        b'A'..=b'F' => Some(c - b'A' + 10),
400        _ => None,
401    }
402}
403
404impl fmt::Display for SwhidObject {
405    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
406        write!(f, "swh:1:{}:{}", self.kind, self.hash_hex())?;
407        for (k, v) in &self.qualifiers {
408            write!(f, ";{k}={v}")?;
409        }
410        Ok(())
411    }
412}
413
414// Keep the wire format as a plain string so CycloneDX/SPDX I/O stays unchanged.
415impl Serialize for SwhidObject {
416    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
417        serializer.serialize_str(&self.to_string())
418    }
419}
420
421impl<'de> Deserialize<'de> for SwhidObject {
422    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
423        let s = String::deserialize(deserializer)?;
424        Self::parse(&s).map_err(serde::de::Error::custom)
425    }
426}
427
428/// Validate a SWHID string (convenience predicate over `SwhidObject::parse`).
429#[must_use]
430pub fn is_valid_swhid(s: &str) -> bool {
431    SwhidObject::parse(s).is_ok()
432}
433
434impl PartialEq for CanonicalId {
435    fn eq(&self, other: &Self) -> bool {
436        self.value == other.value
437    }
438}
439
440impl Hash for CanonicalId {
441    fn hash<H: Hasher>(&self, state: &mut H) {
442        self.value.hash(state);
443    }
444}
445
446impl fmt::Display for CanonicalId {
447    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
448        write!(f, "{}", self.value)
449    }
450}
451
452/// Component identifiers from various sources
453#[derive(Debug, Clone, Default, Serialize, Deserialize)]
454pub struct ComponentIdentifiers {
455    /// Package URL (preferred identifier)
456    pub purl: Option<String>,
457    /// Common Platform Enumeration identifiers
458    pub cpe: Vec<String>,
459    /// Software Heritage persistent identifiers (SWHIDs).
460    ///
461    /// Multiple values supported because a component may be expressible by
462    /// several SWHID kinds (e.g., one `cnt` per archive entry plus a `dir`
463    /// for the unpacked tree). CRA prEN 40000-1-3 `[PRE-7-RQ-07]` accepts
464    /// SWHIDs as one of three named identifier types.
465    ///
466    /// Stored as structured `SwhidObject` for downstream consumers; on the
467    /// wire (JSON), each element serialises as a plain string to match the
468    /// CycloneDX / SPDX 3.0 `swhid` array shape.
469    #[serde(default, skip_serializing_if = "Vec::is_empty")]
470    pub swhid: Vec<SwhidObject>,
471    /// Software Identification tag
472    pub swid: Option<String>,
473    /// Original format-specific identifier
474    pub format_id: String,
475    /// Known aliases for this component
476    pub aliases: Vec<String>,
477}
478
479/// Result of canonical ID generation, including stability information
480#[derive(Debug, Clone)]
481pub struct CanonicalIdResult {
482    /// The canonical ID
483    pub id: CanonicalId,
484    /// Warning message if fallback was used
485    pub warning: Option<String>,
486}
487
488impl ComponentIdentifiers {
489    /// Create a new empty set of identifiers
490    #[must_use]
491    pub fn new(format_id: String) -> Self {
492        Self {
493            format_id,
494            ..Default::default()
495        }
496    }
497
498    /// Get the best available canonical ID (without component context)
499    ///
500    /// For better stability, prefer `canonical_id_with_context()` which can
501    /// generate synthetic IDs from component metadata.
502    #[must_use]
503    pub fn canonical_id(&self) -> CanonicalId {
504        // Tiered fallback: PURL → CPE → SWHID → SWID → format_id
505        if let Some(purl) = &self.purl {
506            return CanonicalId::from_purl(purl);
507        }
508        if let Some(cpe) = self.cpe.first() {
509            return CanonicalId::from_cpe(cpe);
510        }
511        if let Some(swhid) = self.swhid.first() {
512            return CanonicalId::from_swhid_object(swhid);
513        }
514        if let Some(swid) = &self.swid {
515            return CanonicalId::from_swid(swid);
516        }
517        CanonicalId::from_format_id(&self.format_id)
518    }
519
520    /// Get the best available canonical ID with component context for stable fallback
521    ///
522    /// This method uses a tiered fallback strategy:
523    /// 1. PURL (most reliable)
524    /// 2. CPE
525    /// 3. SWHID (content-addressed, CRA prEN 40000-1-3 named)
526    /// 4. SWID
527    /// 5. Synthetic (group:name@version) - stable across regenerations
528    /// 6. Format-specific ID (least stable)
529    ///
530    /// Returns both the ID and any warnings about stability.
531    #[must_use]
532    pub fn canonical_id_with_context(
533        &self,
534        name: &str,
535        version: Option<&str>,
536        group: Option<&str>,
537    ) -> CanonicalIdResult {
538        // Tier 1: PURL (best)
539        if let Some(purl) = &self.purl {
540            return CanonicalIdResult {
541                id: CanonicalId::from_purl(purl),
542                warning: None,
543            };
544        }
545
546        // Tier 2: CPE
547        if let Some(cpe) = self.cpe.first() {
548            return CanonicalIdResult {
549                id: CanonicalId::from_cpe(cpe),
550                warning: None,
551            };
552        }
553
554        // Tier 3: SWHID (content-addressed)
555        if let Some(swhid) = self.swhid.first() {
556            return CanonicalIdResult {
557                id: CanonicalId::from_swhid_object(swhid),
558                warning: None,
559            };
560        }
561
562        // Tier 4: SWID
563        if let Some(swid) = &self.swid {
564            return CanonicalIdResult {
565                id: CanonicalId::from_swid(swid),
566                warning: None,
567            };
568        }
569
570        // Tier 5: Synthetic from name/version/group (stable)
571        // Only use if we have at least a name
572        if !name.is_empty() {
573            return CanonicalIdResult {
574                id: CanonicalId::synthetic(group, name, version),
575                warning: Some(format!(
576                    "Component '{name}' lacks PURL/CPE/SWHID/SWID identifiers; using synthetic ID. \
577                     Consider enriching SBOM with package URLs for accurate diffing."
578                )),
579            };
580        }
581
582        // Tier 6: Format-specific (least stable - may be UUID)
583        let id = CanonicalId::from_format_id(&self.format_id);
584        let warning = if id.is_stable() {
585            Some(format!(
586                "Component uses format-specific ID '{}' without standard identifiers.",
587                self.format_id
588            ))
589        } else {
590            Some(format!(
591                "Component uses unstable format-specific ID '{}'. \
592                 This may cause inaccurate diff results across SBOM regenerations.",
593                self.format_id
594            ))
595        };
596
597        CanonicalIdResult { id, warning }
598    }
599
600    /// Check if this component has any stable identifiers
601    #[must_use]
602    pub fn has_stable_id(&self) -> bool {
603        self.purl.is_some() || !self.cpe.is_empty() || !self.swhid.is_empty() || self.swid.is_some()
604    }
605
606    /// Get the reliability level of available identifiers
607    #[must_use]
608    pub fn id_reliability(&self) -> IdReliability {
609        if self.purl.is_some() {
610            IdReliability::High
611        } else if !self.cpe.is_empty() || !self.swhid.is_empty() || self.swid.is_some() {
612            IdReliability::Medium
613        } else {
614            IdReliability::Low
615        }
616    }
617
618    /// Returns true if this component has any of the CRA-named identifier
619    /// types (PURL, CPE, SWHID, or SWID), satisfying CRA Annex I Part II
620    /// identifier-traceability and prEN 40000-1-3 `[PRE-7-RQ-07]`.
621    #[must_use]
622    pub fn has_cra_identifier(&self) -> bool {
623        self.purl.is_some() || !self.cpe.is_empty() || !self.swhid.is_empty() || self.swid.is_some()
624    }
625}
626
627/// Reliability level of component identification
628#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
629pub enum IdReliability {
630    /// High reliability (PURL available)
631    High,
632    /// Medium reliability (CPE or SWID available)
633    Medium,
634    /// Low reliability (synthetic or format-specific only)
635    Low,
636}
637
638impl fmt::Display for IdReliability {
639    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
640        match self {
641            Self::High => write!(f, "high"),
642            Self::Medium => write!(f, "medium"),
643            Self::Low => write!(f, "low"),
644        }
645    }
646}
647
648/// Ecosystem/package manager type
649#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
650#[non_exhaustive]
651pub enum Ecosystem {
652    Npm,
653    PyPi,
654    Cargo,
655    Maven,
656    Golang,
657    Nuget,
658    RubyGems,
659    Composer,
660    CocoaPods,
661    Swift,
662    Hex,
663    Pub,
664    Hackage,
665    Cpan,
666    Cran,
667    Conda,
668    Conan,
669    Deb,
670    Rpm,
671    Apk,
672    /// HuggingFace Hub ML model (`pkg:huggingface/...`). Not a classical
673    /// package ecosystem; surfaced so ML components are routed through the
674    /// vulnerability/exploitability enrichment stack rather than silently
675    /// treated as `Unknown`.
676    HuggingFace,
677    Generic,
678    Unknown(String),
679}
680
681impl Ecosystem {
682    /// Parse ecosystem from PURL type
683    #[must_use]
684    pub fn from_purl_type(purl_type: &str) -> Self {
685        match purl_type.to_lowercase().as_str() {
686            "npm" => Self::Npm,
687            "pypi" => Self::PyPi,
688            "cargo" => Self::Cargo,
689            "maven" => Self::Maven,
690            "golang" | "go" => Self::Golang,
691            "nuget" => Self::Nuget,
692            "gem" => Self::RubyGems,
693            "composer" => Self::Composer,
694            "cocoapods" => Self::CocoaPods,
695            "swift" => Self::Swift,
696            "hex" => Self::Hex,
697            "pub" => Self::Pub,
698            "hackage" => Self::Hackage,
699            "cpan" => Self::Cpan,
700            "cran" => Self::Cran,
701            "conda" => Self::Conda,
702            "conan" => Self::Conan,
703            "deb" => Self::Deb,
704            "rpm" => Self::Rpm,
705            "apk" => Self::Apk,
706            "huggingface" => Self::HuggingFace,
707            "generic" => Self::Generic,
708            other => Self::Unknown(other.to_string()),
709        }
710    }
711}
712
713impl fmt::Display for Ecosystem {
714    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
715        match self {
716            Self::Npm => write!(f, "npm"),
717            Self::PyPi => write!(f, "pypi"),
718            Self::Cargo => write!(f, "cargo"),
719            Self::Maven => write!(f, "maven"),
720            Self::Golang => write!(f, "golang"),
721            Self::Nuget => write!(f, "nuget"),
722            Self::RubyGems => write!(f, "gem"),
723            Self::Composer => write!(f, "composer"),
724            Self::CocoaPods => write!(f, "cocoapods"),
725            Self::Swift => write!(f, "swift"),
726            Self::Hex => write!(f, "hex"),
727            Self::Pub => write!(f, "pub"),
728            Self::Hackage => write!(f, "hackage"),
729            Self::Cpan => write!(f, "cpan"),
730            Self::Cran => write!(f, "cran"),
731            Self::Conda => write!(f, "conda"),
732            Self::Conan => write!(f, "conan"),
733            Self::Deb => write!(f, "deb"),
734            Self::Rpm => write!(f, "rpm"),
735            Self::Apk => write!(f, "apk"),
736            Self::HuggingFace => write!(f, "huggingface"),
737            Self::Generic => write!(f, "generic"),
738            Self::Unknown(s) => write!(f, "{s}"),
739        }
740    }
741}
742
743// ============================================================================
744// ComponentRef: Lightweight reference combining ID and display name
745// ============================================================================
746
747/// A lightweight reference to a component, combining its stable ID with
748/// a human-readable display name.
749///
750/// This type is used throughout the diff system and TUI to:
751/// - Navigate and link by ID (stable, unique)
752/// - Display by name (human-readable)
753///
754/// # Example
755/// ```ignore
756/// let comp_ref = ComponentRef::new(component.canonical_id.clone(), &component.name);
757/// println!("Component: {} (ID: {})", comp_ref.name(), comp_ref.id());
758/// ```
759#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
760pub struct ComponentRef {
761    /// The stable canonical ID for linking and navigation
762    id: CanonicalId,
763    /// Human-readable name for display
764    name: String,
765    /// Optional version for display context
766    #[serde(skip_serializing_if = "Option::is_none")]
767    version: Option<String>,
768}
769
770impl ComponentRef {
771    /// Create a new component reference
772    pub fn new(id: CanonicalId, name: impl Into<String>) -> Self {
773        Self {
774            id,
775            name: name.into(),
776            version: None,
777        }
778    }
779
780    /// Create a component reference with version
781    pub fn with_version(id: CanonicalId, name: impl Into<String>, version: Option<String>) -> Self {
782        Self {
783            id,
784            name: name.into(),
785            version,
786        }
787    }
788
789    /// Create from a Component
790    #[must_use]
791    pub fn from_component(component: &super::Component) -> Self {
792        Self {
793            id: component.canonical_id.clone(),
794            name: component.name.clone(),
795            version: component.version.clone(),
796        }
797    }
798
799    /// Get the canonical ID
800    #[must_use]
801    pub const fn id(&self) -> &CanonicalId {
802        &self.id
803    }
804
805    /// Get the ID as a string
806    #[must_use]
807    pub fn id_str(&self) -> &str {
808        self.id.value()
809    }
810
811    /// Get the display name
812    #[must_use]
813    pub fn name(&self) -> &str {
814        &self.name
815    }
816
817    /// Get the version if available
818    #[must_use]
819    pub fn version(&self) -> Option<&str> {
820        self.version.as_deref()
821    }
822
823    /// Get display string with version if available
824    #[must_use]
825    pub fn display_with_version(&self) -> String {
826        self.version
827            .as_ref()
828            .map_or_else(|| self.name.clone(), |v| format!("{}@{}", self.name, v))
829    }
830
831    /// Check if this ref matches a given ID
832    #[must_use]
833    pub fn matches_id(&self, id: &CanonicalId) -> bool {
834        &self.id == id
835    }
836
837    /// Check if this ref matches a given ID string
838    #[must_use]
839    pub fn matches_id_str(&self, id_str: &str) -> bool {
840        self.id.value() == id_str
841    }
842}
843
844impl fmt::Display for ComponentRef {
845    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
846        write!(f, "{}", self.name)
847    }
848}
849
850impl From<&super::Component> for ComponentRef {
851    fn from(component: &super::Component) -> Self {
852        Self::from_component(component)
853    }
854}
855
856/// A reference to a vulnerability with its associated component
857#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
858pub struct VulnerabilityRef2 {
859    /// Vulnerability ID (e.g., CVE-2021-44228)
860    pub vuln_id: String,
861    /// Reference to the affected component
862    pub component: ComponentRef,
863}
864
865impl VulnerabilityRef2 {
866    /// Create a new vulnerability reference
867    pub fn new(vuln_id: impl Into<String>, component: ComponentRef) -> Self {
868        Self {
869            vuln_id: vuln_id.into(),
870            component,
871        }
872    }
873
874    /// Get the component's canonical ID
875    #[must_use]
876    pub const fn component_id(&self) -> &CanonicalId {
877        self.component.id()
878    }
879
880    /// Get the component name for display
881    #[must_use]
882    pub fn component_name(&self) -> &str {
883        self.component.name()
884    }
885}
886
887#[cfg(test)]
888mod swhid_tests {
889    use super::*;
890
891    #[test]
892    fn valid_swhid_content() {
893        assert!(is_valid_swhid(
894            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
895        ));
896    }
897
898    #[test]
899    fn valid_swhid_all_kinds() {
900        for kind in ["cnt", "dir", "rev", "rel", "snp"] {
901            let s = format!("swh:1:{kind}:94a9ed024d3859793618152ea559a168bbcbb5e2");
902            assert!(is_valid_swhid(&s), "kind {kind} should be valid");
903        }
904    }
905
906    #[test]
907    fn valid_swhid_with_qualifier() {
908        let swhid =
909            "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d;origin=https://github.com/x/y";
910        assert!(is_valid_swhid(swhid));
911    }
912
913    #[test]
914    fn invalid_swhid_wrong_prefix() {
915        assert!(!is_valid_swhid(
916            "swhid:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
917        ));
918    }
919
920    #[test]
921    fn invalid_swhid_unknown_kind() {
922        assert!(!is_valid_swhid(
923            "swh:1:foo:94a9ed024d3859793618152ea559a168bbcbb5e2"
924        ));
925    }
926
927    #[test]
928    fn invalid_swhid_short_hash() {
929        assert!(!is_valid_swhid("swh:1:cnt:94a9ed024d"));
930    }
931
932    #[test]
933    fn invalid_swhid_non_hex() {
934        assert!(!is_valid_swhid(
935            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbZZZZ"
936        ));
937    }
938
939    #[test]
940    fn invalid_swhid_falls_back_to_format_specific() {
941        let id = CanonicalId::from_swhid("swh:1:foo:bad");
942        assert_eq!(id.source(), &IdSource::FormatSpecific);
943        assert!(!id.is_stable());
944    }
945
946    #[test]
947    fn valid_swhid_construction_and_round_trip() {
948        let raw = "swh:1:cnt:94A9ED024D3859793618152EA559A168BBCBB5E2";
949        let id = CanonicalId::from_swhid(raw);
950        assert_eq!(id.source(), &IdSource::Swhid);
951        assert!(id.is_stable());
952        assert_eq!(
953            id.value(),
954            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
955        );
956    }
957
958    #[test]
959    fn swhid_qualifier_preserved_after_normalization() {
960        let raw = "swh:1:REV:309CF2674EE7A0749978CF8265AB91A60AEA0F7D;origin=Https://X.Y";
961        let id = CanonicalId::from_swhid(raw);
962        assert_eq!(
963            id.value(),
964            "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d;origin=Https://X.Y"
965        );
966    }
967
968    #[test]
969    fn component_identifiers_canonical_id_prefers_purl() {
970        let mut ids = ComponentIdentifiers::new("synthetic-1".to_string());
971        ids.purl = Some("pkg:cargo/serde@1.0.0".to_string());
972        ids.swhid.push(
973            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap(),
974        );
975        assert_eq!(ids.canonical_id().source(), &IdSource::Purl);
976    }
977
978    #[test]
979    fn component_identifiers_canonical_id_uses_swhid_when_purl_absent() {
980        let mut ids = ComponentIdentifiers::new("synthetic-1".to_string());
981        ids.swhid.push(
982            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap(),
983        );
984        let id = ids.canonical_id();
985        assert_eq!(id.source(), &IdSource::Swhid);
986    }
987
988    #[test]
989    fn has_cra_identifier_recognizes_swhid_only() {
990        let mut ids = ComponentIdentifiers::new("synthetic-1".to_string());
991        assert!(!ids.has_cra_identifier());
992        ids.swhid.push(
993            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap(),
994        );
995        assert!(ids.has_cra_identifier());
996    }
997
998    #[test]
999    fn swhid_object_round_trip_via_display() {
1000        let raw = "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2";
1001        let obj = SwhidObject::parse(raw).unwrap();
1002        assert_eq!(obj.kind, SwhidKind::Cnt);
1003        assert_eq!(obj.qualifiers.len(), 0);
1004        assert_eq!(obj.to_string(), raw);
1005    }
1006
1007    #[test]
1008    fn swhid_object_preserves_qualifiers_in_order() {
1009        let raw = "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d;origin=https://github.com/x/y;path=/src";
1010        let obj = SwhidObject::parse(raw).unwrap();
1011        assert_eq!(obj.kind, SwhidKind::Rev);
1012        assert_eq!(obj.qualifiers.len(), 2);
1013        assert_eq!(
1014            obj.qualifiers[0],
1015            ("origin".to_string(), "https://github.com/x/y".to_string())
1016        );
1017        assert_eq!(obj.qualifiers[1], ("path".to_string(), "/src".to_string()));
1018        assert_eq!(obj.to_string(), raw);
1019    }
1020
1021    #[test]
1022    fn swhid_object_lowercases_uppercase_input() {
1023        let raw = "SWH:1:CNT:94A9ED024D3859793618152EA559A168BBCBB5E2";
1024        let obj = SwhidObject::parse(raw).unwrap();
1025        assert_eq!(
1026            obj.to_string(),
1027            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
1028        );
1029    }
1030
1031    #[test]
1032    fn swhid_object_serde_round_trip_as_string() {
1033        let obj = SwhidObject::parse("swh:1:dir:309cf2674ee7a0749978cf8265ab91a60aea0f7d").unwrap();
1034        let json = serde_json::to_string(&obj).unwrap();
1035        assert_eq!(
1036            json,
1037            "\"swh:1:dir:309cf2674ee7a0749978cf8265ab91a60aea0f7d\""
1038        );
1039        let back: SwhidObject = serde_json::from_str(&json).unwrap();
1040        assert_eq!(back, obj);
1041    }
1042
1043    #[test]
1044    fn swhid_object_parse_errors() {
1045        assert_eq!(
1046            SwhidObject::parse("not-a-swhid").unwrap_err(),
1047            SwhidParseError::BadShape
1048        );
1049        assert_eq!(
1050            SwhidObject::parse("swh:2:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap_err(),
1051            SwhidParseError::BadPrefix
1052        );
1053        assert_eq!(
1054            SwhidObject::parse("swh:1:foo:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap_err(),
1055            SwhidParseError::BadKind
1056        );
1057        assert_eq!(
1058            SwhidObject::parse("swh:1:cnt:not-hex").unwrap_err(),
1059            SwhidParseError::BadHash
1060        );
1061        assert_eq!(
1062            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;malformed",)
1063                .unwrap_err(),
1064            SwhidParseError::BadQualifier
1065        );
1066    }
1067
1068    #[test]
1069    fn swhid_object_serializes_within_component_identifiers_as_array_of_strings() {
1070        let mut ids = ComponentIdentifiers::new("synthetic-1".to_string());
1071        ids.swhid.push(
1072            SwhidObject::parse("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2").unwrap(),
1073        );
1074        ids.swhid.push(
1075            SwhidObject::parse("swh:1:dir:309cf2674ee7a0749978cf8265ab91a60aea0f7d").unwrap(),
1076        );
1077        let json = serde_json::to_value(&ids).unwrap();
1078        let arr = json
1079            .get("swhid")
1080            .and_then(|v| v.as_array())
1081            .expect("swhid serialises as array");
1082        assert_eq!(arr.len(), 2);
1083        assert!(arr.iter().all(serde_json::Value::is_string));
1084        // Round-trip via deserialize keeps structure intact
1085        let parsed: ComponentIdentifiers = serde_json::from_value(json).unwrap();
1086        assert_eq!(parsed.swhid.len(), 2);
1087        assert_eq!(parsed.swhid[0].kind, SwhidKind::Cnt);
1088        assert_eq!(parsed.swhid[1].kind, SwhidKind::Dir);
1089    }
1090}