Skip to main content

sbom_tools/matching/
purl.rs

1//! PURL normalization utilities.
2
3use crate::model::Ecosystem;
4
5/// PURL normalizer for consistent comparison.
6pub struct PurlNormalizer;
7
8impl PurlNormalizer {
9    /// Create a new PURL normalizer
10    #[must_use]
11    pub const fn new() -> Self {
12        Self
13    }
14
15    /// Normalize a PURL for comparison
16    #[must_use]
17    pub fn normalize(&self, purl: &str) -> String {
18        self.normalize_internal(purl)
19    }
20
21    fn normalize_internal(&self, purl: &str) -> String {
22        // Detect ecosystem from PURL
23        self.detect_ecosystem(purl).map_or_else(
24            || purl.to_lowercase(),
25            |ecosystem| match ecosystem {
26                Ecosystem::PyPi => self.normalize_pypi(purl),
27                Ecosystem::Npm => self.normalize_npm(purl),
28                Ecosystem::Cargo => self.normalize_cargo(purl),
29                Ecosystem::Maven => self.normalize_maven(purl),
30                Ecosystem::Golang => self.normalize_golang(purl),
31                Ecosystem::Nuget => self.normalize_nuget(purl),
32                _ => purl.to_lowercase(),
33            },
34        )
35    }
36
37    /// Detect ecosystem from PURL
38    fn detect_ecosystem(&self, purl: &str) -> Option<Ecosystem> {
39        let purl_type = purl.strip_prefix("pkg:")?.split('/').next()?;
40
41        Some(Ecosystem::from_purl_type(purl_type))
42    }
43
44    /// Normalize `PyPI` PURL
45    /// `PyPI` names are case-insensitive and treat `_`, `-`, `.` as equivalent
46    fn normalize_pypi(&self, purl: &str) -> String {
47        let lower = purl.to_lowercase();
48        // Replace underscores and dots with hyphens
49        lower.replace(['_', '.'], "-")
50    }
51
52    /// Normalize npm PURL
53    /// npm names are lowercase, scopes use @ prefix
54    fn normalize_npm(&self, purl: &str) -> String {
55        let mut normalized = purl.to_lowercase();
56        // Decode URL-encoded @ for scopes
57        normalized = normalized.replace("%40", "@");
58        normalized
59    }
60
61    /// Normalize Cargo PURL
62    /// Cargo treats hyphens and underscores as equivalent (but prefers underscores)
63    fn normalize_cargo(&self, purl: &str) -> String {
64        let lower = purl.to_lowercase();
65        // Normalize to underscores (Cargo's canonical form)
66        lower.replace('-', "_")
67    }
68
69    /// Normalize Maven PURL
70    /// Maven is case-sensitive for groupId and artifactId
71    fn normalize_maven(&self, purl: &str) -> String {
72        // Maven PURLs should preserve case
73        purl.to_string()
74    }
75
76    /// Normalize Go PURL
77    /// Go modules are case-sensitive and may have /v2 suffixes
78    fn normalize_golang(&self, purl: &str) -> String {
79        // Go PURLs should preserve case
80        purl.to_string()
81    }
82
83    /// Normalize `NuGet` PURL
84    /// `NuGet` package IDs are case-insensitive
85    fn normalize_nuget(&self, purl: &str) -> String {
86        purl.to_lowercase()
87    }
88
89    /// Extract package name from PURL
90    #[must_use]
91    pub fn extract_name(&self, purl: &str) -> Option<String> {
92        let without_pkg = purl.strip_prefix("pkg:")?;
93        let parts: Vec<&str> = without_pkg.split('/').collect();
94
95        let name_part = if parts.len() >= 2 {
96            // Handle namespace/name format
97            parts.last()?
98        } else {
99            return None;
100        };
101
102        // Remove version, qualifiers, subpath
103        let name = name_part
104            .split('@')
105            .next()?
106            .split('?')
107            .next()?
108            .split('#')
109            .next()?;
110
111        Some(name.to_string())
112    }
113
114    /// Extract version from PURL
115    #[must_use]
116    pub fn extract_version(&self, purl: &str) -> Option<String> {
117        let at_pos = purl.find('@')?;
118        let version_part = &purl[at_pos + 1..];
119
120        // Remove qualifiers and subpath
121        let version = version_part.split('?').next()?.split('#').next()?;
122
123        Some(version.to_string())
124    }
125
126    /// Extract ecosystem type from PURL
127    #[must_use]
128    pub fn extract_type(&self, purl: &str) -> Option<String> {
129        let without_pkg = purl.strip_prefix("pkg:")?;
130        let purl_type = without_pkg.split('/').next()?;
131        Some(purl_type.to_string())
132    }
133
134    /// Check if two PURLs refer to the same package (ignoring version)
135    #[must_use]
136    pub fn same_package(&self, purl_a: &str, purl_b: &str) -> bool {
137        let norm_a = self.normalize(purl_a);
138        let norm_b = self.normalize(purl_b);
139
140        // Remove version for comparison
141        let base_a = norm_a.split('@').next().unwrap_or(&norm_a);
142        let base_b = norm_b.split('@').next().unwrap_or(&norm_b);
143
144        base_a == base_b
145    }
146}
147
148impl Default for PurlNormalizer {
149    fn default() -> Self {
150        Self::new()
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    #[test]
159    fn test_pypi_normalization() {
160        let normalizer = PurlNormalizer::new();
161
162        let purl1 = "pkg:pypi/Pillow@9.0.0";
163        let purl2 = "pkg:pypi/pillow@9.0.0";
164
165        assert_eq!(normalizer.normalize(purl1), normalizer.normalize(purl2));
166    }
167
168    #[test]
169    fn test_pypi_separator_normalization() {
170        let normalizer = PurlNormalizer::new();
171
172        let purl1 = "pkg:pypi/python-dateutil@2.8.2";
173        let purl2 = "pkg:pypi/python_dateutil@2.8.2";
174
175        assert_eq!(normalizer.normalize(purl1), normalizer.normalize(purl2));
176    }
177
178    #[test]
179    fn test_npm_scope_normalization() {
180        let normalizer = PurlNormalizer::new();
181
182        let purl1 = "pkg:npm/%40angular/core@15.0.0";
183        let purl2 = "pkg:npm/@angular/core@15.0.0";
184
185        assert_eq!(normalizer.normalize(purl1), normalizer.normalize(purl2));
186    }
187
188    #[test]
189    fn test_extract_name() {
190        let normalizer = PurlNormalizer::new();
191
192        assert_eq!(
193            normalizer.extract_name("pkg:npm/lodash@4.17.21"),
194            Some("lodash".to_string())
195        );
196        assert_eq!(
197            normalizer.extract_name("pkg:maven/org.apache.commons/commons-lang3@3.12.0"),
198            Some("commons-lang3".to_string())
199        );
200    }
201
202    #[test]
203    fn test_same_package() {
204        let normalizer = PurlNormalizer::new();
205
206        assert!(normalizer.same_package("pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.21"));
207        assert!(!normalizer.same_package("pkg:npm/lodash@4.17.21", "pkg:npm/underscore@1.13.0"));
208    }
209}