Skip to main content

sbom_tools/matching/
purl.rs

1//! PURL normalization utilities.
2
3use crate::model::Ecosystem;
4
5/// PURL normalizer for consistent comparison.
6pub struct PurlNormalizer;
7
8impl PurlNormalizer {
9    /// Create a new PURL normalizer
10    pub fn new() -> Self {
11        Self
12    }
13
14    /// Normalize a PURL for comparison
15    pub fn normalize(&self, purl: &str) -> String {
16        self.normalize_internal(purl)
17    }
18
19    fn normalize_internal(&self, purl: &str) -> String {
20        // Detect ecosystem from PURL
21        if let Some(ecosystem) = self.detect_ecosystem(purl) {
22            match ecosystem {
23                Ecosystem::PyPi => self.normalize_pypi(purl),
24                Ecosystem::Npm => self.normalize_npm(purl),
25                Ecosystem::Cargo => self.normalize_cargo(purl),
26                Ecosystem::Maven => self.normalize_maven(purl),
27                Ecosystem::Golang => self.normalize_golang(purl),
28                Ecosystem::Nuget => self.normalize_nuget(purl),
29                _ => purl.to_lowercase(),
30            }
31        } else {
32            purl.to_lowercase()
33        }
34    }
35
36    /// Detect ecosystem from PURL
37    fn detect_ecosystem(&self, purl: &str) -> Option<Ecosystem> {
38        let purl_type = purl.strip_prefix("pkg:")?.split('/').next()?;
39
40        Some(Ecosystem::from_purl_type(purl_type))
41    }
42
43    /// Normalize PyPI PURL
44    /// PyPI names are case-insensitive and treat `_`, `-`, `.` as equivalent
45    fn normalize_pypi(&self, purl: &str) -> String {
46        let lower = purl.to_lowercase();
47        // Replace underscores and dots with hyphens
48        lower.replace(['_', '.'], "-")
49    }
50
51    /// Normalize npm PURL
52    /// npm names are lowercase, scopes use @ prefix
53    fn normalize_npm(&self, purl: &str) -> String {
54        let mut normalized = purl.to_lowercase();
55        // Decode URL-encoded @ for scopes
56        normalized = normalized.replace("%40", "@");
57        normalized
58    }
59
60    /// Normalize Cargo PURL
61    /// Cargo treats hyphens and underscores as equivalent (but prefers underscores)
62    fn normalize_cargo(&self, purl: &str) -> String {
63        let lower = purl.to_lowercase();
64        // Normalize to underscores (Cargo's canonical form)
65        lower.replace('-', "_")
66    }
67
68    /// Normalize Maven PURL
69    /// Maven is case-sensitive for groupId and artifactId
70    fn normalize_maven(&self, purl: &str) -> String {
71        // Maven PURLs should preserve case
72        purl.to_string()
73    }
74
75    /// Normalize Go PURL
76    /// Go modules are case-sensitive and may have /v2 suffixes
77    fn normalize_golang(&self, purl: &str) -> String {
78        // Go PURLs should preserve case
79        purl.to_string()
80    }
81
82    /// Normalize NuGet PURL
83    /// NuGet package IDs are case-insensitive
84    fn normalize_nuget(&self, purl: &str) -> String {
85        purl.to_lowercase()
86    }
87
88    /// Extract package name from PURL
89    pub fn extract_name(&self, purl: &str) -> Option<String> {
90        let without_pkg = purl.strip_prefix("pkg:")?;
91        let parts: Vec<&str> = without_pkg.split('/').collect();
92
93        let name_part = if parts.len() >= 2 {
94            // Handle namespace/name format
95            parts.last()?
96        } else {
97            return None;
98        };
99
100        // Remove version, qualifiers, subpath
101        let name = name_part
102            .split('@')
103            .next()?
104            .split('?')
105            .next()?
106            .split('#')
107            .next()?;
108
109        Some(name.to_string())
110    }
111
112    /// Extract version from PURL
113    pub fn extract_version(&self, purl: &str) -> Option<String> {
114        let at_pos = purl.find('@')?;
115        let version_part = &purl[at_pos + 1..];
116
117        // Remove qualifiers and subpath
118        let version = version_part.split('?').next()?.split('#').next()?;
119
120        Some(version.to_string())
121    }
122
123    /// Extract ecosystem type from PURL
124    pub fn extract_type(&self, purl: &str) -> Option<String> {
125        let without_pkg = purl.strip_prefix("pkg:")?;
126        let purl_type = without_pkg.split('/').next()?;
127        Some(purl_type.to_string())
128    }
129
130    /// Check if two PURLs refer to the same package (ignoring version)
131    pub fn same_package(&self, purl_a: &str, purl_b: &str) -> bool {
132        let norm_a = self.normalize(purl_a);
133        let norm_b = self.normalize(purl_b);
134
135        // Remove version for comparison
136        let base_a = norm_a.split('@').next().unwrap_or(&norm_a);
137        let base_b = norm_b.split('@').next().unwrap_or(&norm_b);
138
139        base_a == base_b
140    }
141}
142
143impl Default for PurlNormalizer {
144    fn default() -> Self {
145        Self::new()
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn test_pypi_normalization() {
155        let normalizer = PurlNormalizer::new();
156
157        let purl1 = "pkg:pypi/Pillow@9.0.0";
158        let purl2 = "pkg:pypi/pillow@9.0.0";
159
160        assert_eq!(normalizer.normalize(purl1), normalizer.normalize(purl2));
161    }
162
163    #[test]
164    fn test_pypi_separator_normalization() {
165        let normalizer = PurlNormalizer::new();
166
167        let purl1 = "pkg:pypi/python-dateutil@2.8.2";
168        let purl2 = "pkg:pypi/python_dateutil@2.8.2";
169
170        assert_eq!(normalizer.normalize(purl1), normalizer.normalize(purl2));
171    }
172
173    #[test]
174    fn test_npm_scope_normalization() {
175        let normalizer = PurlNormalizer::new();
176
177        let purl1 = "pkg:npm/%40angular/core@15.0.0";
178        let purl2 = "pkg:npm/@angular/core@15.0.0";
179
180        assert_eq!(normalizer.normalize(purl1), normalizer.normalize(purl2));
181    }
182
183    #[test]
184    fn test_extract_name() {
185        let normalizer = PurlNormalizer::new();
186
187        assert_eq!(
188            normalizer.extract_name("pkg:npm/lodash@4.17.21"),
189            Some("lodash".to_string())
190        );
191        assert_eq!(
192            normalizer.extract_name("pkg:maven/org.apache.commons/commons-lang3@3.12.0"),
193            Some("commons-lang3".to_string())
194        );
195    }
196
197    #[test]
198    fn test_same_package() {
199        let normalizer = PurlNormalizer::new();
200
201        assert!(normalizer.same_package("pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.21"));
202        assert!(!normalizer.same_package("pkg:npm/lodash@4.17.21", "pkg:npm/underscore@1.13.0"));
203    }
204}