Skip to main content

provenant/parsers/
utils.rs

1/// Shared utility functions for package parsers
2///
3/// This module provides common file I/O and parsing utilities
4/// used across multiple parser implementations.
5use std::fs::File;
6use std::io::Read;
7use std::path::Path;
8
9use anyhow::Result;
10use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64_STANDARD};
11use packageurl::PackageUrl;
12
13/// Reads a file's entire contents into a String.
14///
15/// # Arguments
16///
17/// * `path` - Path to the file to read
18///
19/// # Returns
20///
21/// * `Ok(String)` - File contents as UTF-8 string
22/// * `Err` - I/O error or UTF-8 decoding error
23///
24/// # Examples
25///
26/// ```no_run
27/// use std::path::Path;
28/// use provenant::parsers::utils::read_file_to_string;
29///
30/// let content = read_file_to_string(Path::new("path/to/file.txt"))?;
31/// # Ok::<(), anyhow::Error>(())
32/// ```
33pub fn read_file_to_string(path: &Path) -> Result<String> {
34    let mut file = File::open(path)?;
35    let mut content = String::new();
36    file.read_to_string(&mut content)?;
37    Ok(content)
38}
39
40/// Creates a correctly-formatted npm Package URL for scoped or regular packages.
41///
42/// Handles namespace encoding for scoped packages (e.g., `@babel/core`) and ensures
43/// the slash between namespace and package name is NOT encoded as `%2F`.
44pub fn npm_purl(full_name: &str, version: Option<&str>) -> Option<String> {
45    let (namespace, name) = if full_name.starts_with('@') {
46        let parts: Vec<&str> = full_name.splitn(2, '/').collect();
47        if parts.len() == 2 {
48            (Some(parts[0]), parts[1])
49        } else {
50            (None, full_name)
51        }
52    } else {
53        (None, full_name)
54    };
55
56    let mut purl = PackageUrl::new("npm", name).ok()?;
57
58    if let Some(ns) = namespace {
59        purl.with_namespace(ns).ok()?;
60    }
61
62    if let Some(ver) = version {
63        purl.with_version(ver).ok()?;
64    }
65
66    Some(purl.to_string())
67}
68
69/// Parses Subresource Integrity (SRI) format and returns hash as hex string.
70///
71/// SRI format: "algorithm-base64string" (e.g., "sha512-9NET910DNaIPng...")
72///
73/// Returns the algorithm name and hex-encoded hash digest.
74pub fn parse_sri(integrity: &str) -> Option<(String, String)> {
75    let parts: Vec<&str> = integrity.splitn(2, '-').collect();
76    if parts.len() != 2 {
77        return None;
78    }
79
80    let algorithm = parts[0];
81    let base64_str = parts[1];
82
83    let bytes = BASE64_STANDARD.decode(base64_str).ok()?;
84
85    let hex_string = bytes
86        .iter()
87        .map(|b| format!("{:02x}", b))
88        .collect::<String>();
89
90    Some((algorithm.to_string(), hex_string))
91}
92
93/// Parses "Name <email@domain.com>" format into separate components.
94///
95/// This utility handles common author/maintainer strings found in package manifests
96/// where the format combines a human-readable name with an email address in angle brackets.
97///
98/// # Arguments
99///
100/// * `s` - A string potentially containing name and email in "Name \<email\>" format
101///
102/// # Returns
103///
104/// A tuple of `(Option<String>, Option<String>)` representing `(name, email)`:
105/// - If `\<email\>` pattern found: name (trimmed, or None if empty) and email
106/// - If no pattern: trimmed input as name, None for email
107///
108/// # Examples
109///
110/// ```
111/// use provenant::parsers::utils::split_name_email;
112///
113/// // Full format
114/// let (name, email) = split_name_email("John Doe <john@example.com>");
115/// assert_eq!(name, Some("John Doe".to_string()));
116/// assert_eq!(email, Some("john@example.com".to_string()));
117///
118/// // Email only in angle brackets
119/// let (name, email) = split_name_email("<john@example.com>");
120/// assert_eq!(name, None);
121/// assert_eq!(email, Some("john@example.com".to_string()));
122///
123/// // Name only (no angle brackets)
124/// let (name, email) = split_name_email("John Doe");
125/// assert_eq!(name, Some("John Doe".to_string()));
126/// assert_eq!(email, None);
127/// ```
128pub fn split_name_email(s: &str) -> (Option<String>, Option<String>) {
129    if let Some(email_start) = s.find('<')
130        && let Some(email_end) = s.find('>')
131        && email_start < email_end
132    {
133        let name = s[..email_start].trim();
134        let email = &s[email_start + 1..email_end];
135        (
136            if name.is_empty() {
137                None
138            } else {
139                Some(name.to_string())
140            },
141            Some(email.to_string()),
142        )
143    } else {
144        (Some(s.trim().to_string()), None)
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use std::io::Write;
152    use tempfile::tempdir;
153
154    #[test]
155    fn test_read_file_to_string_success() {
156        let dir = tempdir().unwrap();
157        let file_path = dir.path().join("test.txt");
158        let mut file = File::create(&file_path).unwrap();
159        file.write_all(b"test content").unwrap();
160
161        let content = read_file_to_string(&file_path).unwrap();
162        assert_eq!(content, "test content");
163    }
164
165    #[test]
166    fn test_read_file_to_string_nonexistent() {
167        let path = Path::new("/nonexistent/file.txt");
168        let result = read_file_to_string(path);
169        assert!(result.is_err());
170    }
171
172    #[test]
173    fn test_read_file_to_string_empty() {
174        let dir = tempdir().unwrap();
175        let file_path = dir.path().join("empty.txt");
176        File::create(&file_path).unwrap();
177
178        let content = read_file_to_string(&file_path).unwrap();
179        assert_eq!(content, "");
180    }
181
182    #[test]
183    fn test_npm_purl_scoped_with_version() {
184        let purl = npm_purl("@babel/core", Some("7.0.0")).unwrap();
185        assert_eq!(purl, "pkg:npm/%40babel/core@7.0.0");
186    }
187
188    #[test]
189    fn test_npm_purl_scoped_without_version() {
190        let purl = npm_purl("@babel/core", None).unwrap();
191        assert_eq!(purl, "pkg:npm/%40babel/core");
192    }
193
194    #[test]
195    fn test_npm_purl_unscoped_with_version() {
196        let purl = npm_purl("lodash", Some("4.17.21")).unwrap();
197        assert_eq!(purl, "pkg:npm/lodash@4.17.21");
198    }
199
200    #[test]
201    fn test_npm_purl_unscoped_without_version() {
202        let purl = npm_purl("lodash", None).unwrap();
203        assert_eq!(purl, "pkg:npm/lodash");
204    }
205
206    #[test]
207    fn test_npm_purl_scoped_slash_not_encoded() {
208        let purl = npm_purl("@types/node", Some("18.0.0")).unwrap();
209        assert!(purl.contains("/%40types/node"));
210        assert!(!purl.contains("%2F"));
211    }
212
213    #[test]
214    fn test_parse_sri_sha512() {
215        let (algo, hash) = parse_sri("sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ==").unwrap();
216        assert_eq!(algo, "sha512");
217        assert_eq!(hash.len(), 128);
218    }
219
220    #[test]
221    fn test_parse_sri_sha1() {
222        let (algo, hash) = parse_sri("sha1-w7M6te42DYbg5ijwRorn7yfWVN8=").unwrap();
223        assert_eq!(algo, "sha1");
224        assert_eq!(hash.len(), 40);
225    }
226
227    #[test]
228    fn test_parse_sri_sha256() {
229        let (algo, hash) =
230            parse_sri("sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=").unwrap();
231        assert_eq!(algo, "sha256");
232        assert_eq!(hash.len(), 64);
233    }
234
235    #[test]
236    fn test_parse_sri_invalid_format() {
237        assert!(parse_sri("invalid").is_none());
238        assert!(parse_sri("sha512").is_none());
239        assert!(parse_sri("").is_none());
240    }
241
242    #[test]
243    fn test_parse_sri_invalid_base64() {
244        assert!(parse_sri("sha512-!!!invalid!!!").is_none());
245    }
246
247    #[test]
248    fn test_split_name_email_full_format() {
249        let (name, email) = split_name_email("John Doe <john@example.com>");
250        assert_eq!(name, Some("John Doe".to_string()));
251        assert_eq!(email, Some("john@example.com".to_string()));
252    }
253
254    #[test]
255    fn test_split_name_email_name_only() {
256        let (name, email) = split_name_email("John Doe");
257        assert_eq!(name, Some("John Doe".to_string()));
258        assert_eq!(email, None);
259    }
260
261    #[test]
262    fn test_split_name_email_email_only_plain() {
263        let (name, email) = split_name_email("john@example.com");
264        assert_eq!(name, Some("john@example.com".to_string()));
265        assert_eq!(email, None);
266    }
267
268    #[test]
269    fn test_split_name_email_email_only_brackets() {
270        let (name, email) = split_name_email("<john@example.com>");
271        assert_eq!(name, None);
272        assert_eq!(email, Some("john@example.com".to_string()));
273    }
274
275    #[test]
276    fn test_split_name_email_whitespace_trimming() {
277        let (name, email) = split_name_email("  John Doe  <  john@example.com  >  ");
278        assert_eq!(name, Some("John Doe".to_string()));
279        assert_eq!(email, Some("  john@example.com  ".to_string()));
280    }
281
282    #[test]
283    fn test_split_name_email_empty_string() {
284        let (name, email) = split_name_email("");
285        assert_eq!(name, Some("".to_string()));
286        assert_eq!(email, None);
287    }
288
289    #[test]
290    fn test_split_name_email_whitespace_only() {
291        let (name, email) = split_name_email("   ");
292        assert_eq!(name, Some("".to_string()));
293        assert_eq!(email, None);
294    }
295
296    #[test]
297    fn test_split_name_email_invalid_bracket_order() {
298        let (name, email) = split_name_email("John >email< Doe");
299        assert_eq!(name, Some("John >email< Doe".to_string()));
300        assert_eq!(email, None);
301    }
302
303    #[test]
304    fn test_split_name_email_missing_close_bracket() {
305        let (name, email) = split_name_email("John Doe <email@example.com");
306        assert_eq!(name, Some("John Doe <email@example.com".to_string()));
307        assert_eq!(email, None);
308    }
309
310    #[test]
311    fn test_split_name_email_missing_open_bracket() {
312        let (name, email) = split_name_email("John Doe email@example.com>");
313        assert_eq!(name, Some("John Doe email@example.com>".to_string()));
314        assert_eq!(email, None);
315    }
316}