Skip to main content

vulnera_advisor/
purl.rs

1//! Package URL (PURL) builder and parser.
2//!
3//! Provides a convenient way to construct and parse Package URLs
4//! following the [PURL specification](https://github.com/package-url/purl-spec).
5//!
6//! # Example
7//!
8//! ```rust
9//! use vulnera_advisor::Purl;
10//!
11//! // Simple PURL
12//! let purl = Purl::new("npm", "lodash")
13//!     .with_version("4.17.20")
14//!     .to_string();
15//! assert_eq!(purl, "pkg:npm/lodash@4.17.20");
16//!
17//! // Maven with namespace (groupId)
18//! let purl = Purl::new("maven", "spring-core")
19//!     .with_namespace("org.springframework")
20//!     .with_version("5.3.9")
21//!     .to_string();
22//! assert_eq!(purl, "pkg:maven/org.springframework/spring-core@5.3.9");
23//! ```
24
25use std::collections::hash_map::DefaultHasher;
26use std::fmt;
27use std::hash::{Hash, Hasher};
28
29use crate::ecosystem::canonicalize_ecosystem;
30
31/// Known valid PURL ecosystem types.
32///
33/// This list includes all ecosystems supported by OSS Index and other
34/// vulnerability databases.
35pub const KNOWN_ECOSYSTEMS: &[&str] = &[
36    "cargo",     // Rust crates
37    "cocoapods", // iOS/macOS CocoaPods
38    "composer",  // PHP Composer
39    "conan",     // C/C++ Conan
40    "conda",     // Conda packages
41    "cran",      // R packages
42    "deb",       // Debian packages
43    "gem",       // Ruby gems
44    "generic",   // Generic packages
45    "github",    // GitHub repositories
46    "golang",    // Go modules
47    "hex",       // Erlang/Elixir Hex
48    "maven",     // Java Maven
49    "npm",       // Node.js npm
50    "nuget",     // .NET NuGet
51    "pub",       // Dart/Flutter pub
52    "pypi",      // Python PyPI
53    "rpm",       // RPM packages
54    "swift",     // Swift packages
55];
56
57/// Ecosystem name mappings from common names to PURL types.
58/// Some ecosystems use different names in PURL vs common usage.
59const ECOSYSTEM_MAPPINGS: &[(&str, &str)] = &[
60    ("crates.io", "cargo"),
61    ("PyPI", "pypi"),
62    ("RubyGems", "gem"),
63    ("Go", "golang"),
64    ("Packagist", "composer"),
65    ("NuGet", "nuget"),
66    ("Hex", "hex"),
67    ("Pub", "pub"),
68];
69
70/// Error returned when PURL validation fails.
71#[derive(Debug, Clone, thiserror::Error)]
72pub enum PurlError {
73    /// The ecosystem/type is not recognized.
74    #[error("Unknown ecosystem '{0}'. Known ecosystems: cargo, npm, pypi, maven, etc.")]
75    UnknownEcosystem(String),
76
77    /// The PURL string format is invalid.
78    #[error("Invalid PURL format: {0}")]
79    InvalidFormat(String),
80
81    /// The package name is empty or invalid.
82    #[error("Invalid package name: {0}")]
83    InvalidName(String),
84}
85
86/// A Package URL builder for creating valid PURL strings.
87///
88/// PURLs are a standardized way to identify software packages across
89/// different ecosystems. This struct provides a builder pattern for
90/// constructing valid PURL strings.
91///
92/// # Format
93///
94/// ```text
95/// pkg:type/namespace/name@version?qualifiers#subpath
96/// ```
97///
98/// - **type** (required): Package ecosystem (npm, maven, pypi, etc.)
99/// - **namespace** (optional): Package scope/group (e.g., Maven groupId, npm scope)
100/// - **name** (required): Package name
101/// - **version** (optional): Specific version
102///
103/// # Example
104///
105/// ```rust
106/// use vulnera_advisor::Purl;
107///
108/// // Scoped npm package
109/// let purl = Purl::new("npm", "core")
110///     .with_namespace("@angular")
111///     .with_version("12.0.0")
112///     .to_string();
113/// assert_eq!(purl, "pkg:npm/%40angular/core@12.0.0");
114/// ```
115#[derive(Debug, Clone, PartialEq, Eq, Hash)]
116pub struct Purl {
117    /// Package type (ecosystem).
118    pub purl_type: String,
119    /// Optional namespace (e.g., Maven groupId, npm scope).
120    pub namespace: Option<String>,
121    /// Package name.
122    pub name: String,
123    /// Optional version.
124    pub version: Option<String>,
125}
126
127impl Purl {
128    /// Create a new PURL with the given ecosystem and package name.
129    ///
130    /// The ecosystem is automatically mapped to the correct PURL type
131    /// (e.g., "crates.io" → "cargo", "PyPI" → "pypi").
132    ///
133    /// # Arguments
134    ///
135    /// * `ecosystem` - The package ecosystem (e.g., "npm", "crates.io", "PyPI")
136    /// * `name` - The package name
137    ///
138    /// # Example
139    ///
140    /// ```rust
141    /// use vulnera_advisor::Purl;
142    ///
143    /// let purl = Purl::new("crates.io", "serde");
144    /// assert_eq!(purl.purl_type, "cargo");
145    /// ```
146    pub fn new(ecosystem: impl Into<String>, name: impl Into<String>) -> Self {
147        let eco = ecosystem.into();
148        let purl_type = Self::map_ecosystem(&eco);
149
150        Self {
151            purl_type,
152            namespace: None,
153            name: name.into(),
154            version: None,
155        }
156    }
157
158    /// Create a new PURL with validation.
159    ///
160    /// Returns an error if the ecosystem is not in the known list.
161    ///
162    /// # Example
163    ///
164    /// ```rust
165    /// use vulnera_advisor::Purl;
166    ///
167    /// // Valid ecosystem
168    /// let purl = Purl::new_validated("npm", "lodash").unwrap();
169    ///
170    /// // Invalid ecosystem
171    /// let result = Purl::new_validated("invalid", "package");
172    /// assert!(result.is_err());
173    /// ```
174    pub fn new_validated(
175        ecosystem: impl Into<String>,
176        name: impl Into<String>,
177    ) -> Result<Self, PurlError> {
178        let eco = ecosystem.into();
179        let name = name.into();
180
181        if name.is_empty() {
182            return Err(PurlError::InvalidName(
183                "Package name cannot be empty".into(),
184            ));
185        }
186
187        let purl_type = Self::map_ecosystem(&eco);
188
189        if !Self::is_known_ecosystem(&purl_type) {
190            return Err(PurlError::UnknownEcosystem(eco));
191        }
192
193        Ok(Self {
194            purl_type,
195            namespace: None,
196            name,
197            version: None,
198        })
199    }
200
201    /// Check if an ecosystem type is in the known list.
202    pub fn is_known_ecosystem(purl_type: &str) -> bool {
203        KNOWN_ECOSYSTEMS.contains(&purl_type.to_lowercase().as_str())
204    }
205
206    /// Add a namespace (e.g., Maven groupId, npm scope like "@angular").
207    pub fn with_namespace(mut self, namespace: impl Into<String>) -> Self {
208        self.namespace = Some(namespace.into());
209        self
210    }
211
212    /// Add a version.
213    pub fn with_version(mut self, version: impl Into<String>) -> Self {
214        self.version = Some(version.into());
215        self
216    }
217
218    /// Map common ecosystem names to PURL types.
219    fn map_ecosystem(ecosystem: &str) -> String {
220        if let Some(canonical) = canonicalize_ecosystem(ecosystem) {
221            return match canonical {
222                "cargo" => "cargo".to_string(),
223                "go" => "golang".to_string(),
224                "packagist" => "composer".to_string(),
225                "rubygems" => "gem".to_string(),
226                other => other.to_string(),
227            };
228        }
229
230        for (from, to) in ECOSYSTEM_MAPPINGS {
231            if ecosystem.eq_ignore_ascii_case(from) {
232                return to.to_string();
233            }
234        }
235        ecosystem.to_lowercase()
236    }
237
238    /// URL-encode special characters in PURL components.
239    fn encode_component(s: &str) -> String {
240        s.replace('@', "%40")
241            .replace('/', "%2F")
242            .replace('?', "%3F")
243            .replace('#', "%23")
244    }
245
246    /// URL-decode PURL components.
247    fn decode_component(s: &str) -> String {
248        s.replace("%40", "@")
249            .replace("%2F", "/")
250            .replace("%3F", "?")
251            .replace("%23", "#")
252    }
253
254    /// Parse a PURL string into a Purl struct.
255    ///
256    /// # Example
257    ///
258    /// ```rust
259    /// use vulnera_advisor::Purl;
260    ///
261    /// let purl = Purl::parse("pkg:npm/lodash@4.17.20").unwrap();
262    /// assert_eq!(purl.purl_type, "npm");
263    /// assert_eq!(purl.name, "lodash");
264    /// assert_eq!(purl.version, Some("4.17.20".to_string()));
265    /// ```
266    pub fn parse(s: &str) -> Result<Self, PurlError> {
267        let s = s
268            .strip_prefix("pkg:")
269            .ok_or_else(|| PurlError::InvalidFormat("PURL must start with 'pkg:'".into()))?;
270
271        // Split type from rest
272        let (purl_type, rest) = s
273            .split_once('/')
274            .ok_or_else(|| PurlError::InvalidFormat("Missing '/' after type".into()))?;
275
276        if purl_type.is_empty() {
277            return Err(PurlError::InvalidFormat("Empty PURL type".into()));
278        }
279
280        // Remove qualifiers and subpath for now (everything after ? or #)
281        let rest = rest.split('?').next().unwrap_or(rest);
282        let rest = rest.split('#').next().unwrap_or(rest);
283
284        // Handle version
285        let (path, version) = if let Some((p, v)) = rest.split_once('@') {
286            (p, Some(v.to_string()))
287        } else {
288            (rest, None)
289        };
290
291        // Handle namespace
292        let (namespace, name) = if let Some((ns, n)) = path.rsplit_once('/') {
293            (Some(Self::decode_component(ns)), Self::decode_component(n))
294        } else {
295            (None, Self::decode_component(path))
296        };
297
298        if name.is_empty() {
299            return Err(PurlError::InvalidName(
300                "Package name cannot be empty".into(),
301            ));
302        }
303
304        Ok(Self {
305            purl_type: purl_type.to_string(),
306            namespace,
307            name,
308            version,
309        })
310    }
311
312    /// Get the ecosystem name (reverse mapping from PURL type).
313    ///
314    /// Returns the common ecosystem name for known mappings,
315    /// or the PURL type itself if no mapping exists.
316    pub fn ecosystem(&self) -> String {
317        // Reverse lookup for common mappings
318        for (eco, purl) in ECOSYSTEM_MAPPINGS {
319            if self.purl_type.eq_ignore_ascii_case(purl) {
320                return eco.to_string();
321            }
322        }
323        self.purl_type.clone()
324    }
325
326    /// Generate a hash suitable for use as a cache key.
327    ///
328    /// This creates a deterministic hash of the PURL for use in
329    /// Redis cache keys.
330    pub fn cache_key(&self) -> String {
331        let mut hasher = DefaultHasher::new();
332        self.hash(&mut hasher);
333        format!("{:x}", hasher.finish())
334    }
335
336    /// Generate a cache key from a PURL string.
337    pub fn cache_key_from_str(purl: &str) -> String {
338        let mut hasher = DefaultHasher::new();
339        purl.hash(&mut hasher);
340        format!("{:x}", hasher.finish())
341    }
342}
343
344impl fmt::Display for Purl {
345    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
346        write!(f, "pkg:{}/", self.purl_type)?;
347
348        if let Some(ns) = &self.namespace {
349            write!(f, "{}/", Self::encode_component(ns))?;
350        }
351
352        write!(f, "{}", self.name)?;
353
354        if let Some(v) = &self.version {
355            write!(f, "@{}", v)?;
356        }
357
358        Ok(())
359    }
360}
361
362/// Create a PURL from ecosystem, name, and version.
363///
364/// This is a convenience function for creating PURLs without importing
365/// the `Purl` struct directly.
366///
367/// # Example
368///
369/// ```rust
370/// use vulnera_advisor::purl::purl;
371///
372/// let p = purl("npm", "lodash", "4.17.20");
373/// assert_eq!(p.to_string(), "pkg:npm/lodash@4.17.20");
374/// ```
375pub fn purl(ecosystem: &str, name: &str, version: &str) -> Purl {
376    Purl::new(ecosystem, name).with_version(version)
377}
378
379/// Create multiple PURLs from a list of (ecosystem, name, version) tuples.
380///
381/// # Example
382///
383/// ```rust
384/// use vulnera_advisor::purl::purls_from_packages;
385///
386/// let purls = purls_from_packages(&[
387///     ("npm", "lodash", "4.17.20"),
388///     ("cargo", "serde", "1.0.130"),
389/// ]);
390/// assert_eq!(purls.len(), 2);
391/// ```
392pub fn purls_from_packages(packages: &[(&str, &str, &str)]) -> Vec<Purl> {
393    packages
394        .iter()
395        .map(|(eco, name, ver)| Purl::new(*eco, *name).with_version(*ver))
396        .collect()
397}
398
399/// Convert a list of PURLs to a vector of string references.
400///
401/// Useful for passing to OSS Index queries.
402pub fn purls_to_strings(purls: &[Purl]) -> Vec<String> {
403    purls.iter().map(|p| p.to_string()).collect()
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409
410    #[test]
411    fn test_simple_purl() {
412        let purl = Purl::new("npm", "lodash").with_version("4.17.20");
413        assert_eq!(purl.to_string(), "pkg:npm/lodash@4.17.20");
414    }
415
416    #[test]
417    fn test_ecosystem_mapping() {
418        let purl = Purl::new("crates.io", "serde").with_version("1.0.130");
419        assert_eq!(purl.to_string(), "pkg:cargo/serde@1.0.130");
420
421        let purl = Purl::new("PyPI", "requests");
422        assert_eq!(purl.to_string(), "pkg:pypi/requests");
423
424        let purl = Purl::new("RubyGems", "rails");
425        assert_eq!(purl.to_string(), "pkg:gem/rails");
426    }
427
428    #[test]
429    fn test_maven_with_namespace() {
430        let purl = Purl::new("maven", "spring-core")
431            .with_namespace("org.springframework")
432            .with_version("5.3.9");
433        assert_eq!(
434            purl.to_string(),
435            "pkg:maven/org.springframework/spring-core@5.3.9"
436        );
437    }
438
439    #[test]
440    fn test_npm_scoped() {
441        let purl = Purl::new("npm", "core")
442            .with_namespace("@angular")
443            .with_version("12.0.0");
444        assert_eq!(purl.to_string(), "pkg:npm/%40angular/core@12.0.0");
445    }
446
447    #[test]
448    fn test_parse_simple() {
449        let purl = Purl::parse("pkg:npm/lodash@4.17.20").unwrap();
450        assert_eq!(purl.purl_type, "npm");
451        assert_eq!(purl.name, "lodash");
452        assert_eq!(purl.version, Some("4.17.20".to_string()));
453        assert_eq!(purl.namespace, None);
454    }
455
456    #[test]
457    fn test_parse_with_namespace() {
458        let purl = Purl::parse("pkg:maven/org.springframework/spring-core@5.3.9").unwrap();
459        assert_eq!(purl.purl_type, "maven");
460        assert_eq!(purl.namespace, Some("org.springframework".to_string()));
461        assert_eq!(purl.name, "spring-core");
462        assert_eq!(purl.version, Some("5.3.9".to_string()));
463    }
464
465    #[test]
466    fn test_parse_scoped_npm() {
467        let purl = Purl::parse("pkg:npm/%40angular/core@12.0.0").unwrap();
468        assert_eq!(purl.namespace, Some("@angular".to_string()));
469        assert_eq!(purl.name, "core");
470    }
471
472    #[test]
473    fn test_roundtrip() {
474        let original = "pkg:npm/lodash@4.17.20";
475        let purl = Purl::parse(original).unwrap();
476        assert_eq!(purl.to_string(), original);
477
478        let original = "pkg:maven/org.springframework/spring-core@5.3.9";
479        let purl = Purl::parse(original).unwrap();
480        assert_eq!(purl.to_string(), original);
481    }
482
483    #[test]
484    fn test_validation() {
485        // Valid ecosystem
486        assert!(Purl::new_validated("npm", "lodash").is_ok());
487        assert!(Purl::new_validated("crates.io", "serde").is_ok());
488        assert!(Purl::new_validated("cargo", "serde").is_ok());
489
490        // Invalid ecosystem
491        assert!(Purl::new_validated("invalid_eco", "package").is_err());
492
493        // Empty name
494        assert!(Purl::new_validated("npm", "").is_err());
495    }
496
497    #[test]
498    fn test_ecosystem_reverse_mapping() {
499        let purl = Purl::new("cargo", "serde");
500        assert_eq!(purl.ecosystem(), "crates.io");
501
502        let purl = Purl::new("pypi", "requests");
503        assert_eq!(purl.ecosystem(), "PyPI");
504    }
505
506    #[test]
507    fn test_cache_key() {
508        let purl1 = Purl::new("npm", "lodash").with_version("4.17.20");
509        let purl2 = Purl::new("npm", "lodash").with_version("4.17.20");
510        let purl3 = Purl::new("npm", "lodash").with_version("4.17.21");
511
512        assert_eq!(purl1.cache_key(), purl2.cache_key());
513        assert_ne!(purl1.cache_key(), purl3.cache_key());
514    }
515
516    #[test]
517    fn test_purls_from_packages() {
518        let purls =
519            purls_from_packages(&[("npm", "lodash", "4.17.20"), ("cargo", "serde", "1.0.130")]);
520
521        assert_eq!(purls.len(), 2);
522        assert_eq!(purls[0].to_string(), "pkg:npm/lodash@4.17.20");
523        assert_eq!(purls[1].to_string(), "pkg:cargo/serde@1.0.130");
524    }
525
526    #[test]
527    fn test_known_ecosystems() {
528        assert!(Purl::is_known_ecosystem("npm"));
529        assert!(Purl::is_known_ecosystem("cargo"));
530        assert!(Purl::is_known_ecosystem("pypi"));
531        assert!(Purl::is_known_ecosystem("NPM")); // Case insensitive
532        assert!(!Purl::is_known_ecosystem("unknown"));
533    }
534}