provenant/parsers/
utils.rs1use std::fs::File;
6use std::io::Read;
7use std::path::Path;
8
9use anyhow::Result;
10use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64_STANDARD};
11use packageurl::PackageUrl;
12
13pub fn read_file_to_string(path: &Path) -> Result<String> {
34 let mut file = File::open(path)?;
35 let mut content = String::new();
36 file.read_to_string(&mut content)?;
37 Ok(content)
38}
39
40pub fn npm_purl(full_name: &str, version: Option<&str>) -> Option<String> {
45 let (namespace, name) = if full_name.starts_with('@') {
46 let parts: Vec<&str> = full_name.splitn(2, '/').collect();
47 if parts.len() == 2 {
48 (Some(parts[0]), parts[1])
49 } else {
50 (None, full_name)
51 }
52 } else {
53 (None, full_name)
54 };
55
56 let mut purl = PackageUrl::new("npm", name).ok()?;
57
58 if let Some(ns) = namespace {
59 purl.with_namespace(ns).ok()?;
60 }
61
62 if let Some(ver) = version {
63 purl.with_version(ver).ok()?;
64 }
65
66 Some(purl.to_string())
67}
68
69pub fn parse_sri(integrity: &str) -> Option<(String, String)> {
75 let parts: Vec<&str> = integrity.splitn(2, '-').collect();
76 if parts.len() != 2 {
77 return None;
78 }
79
80 let algorithm = parts[0];
81 let base64_str = parts[1];
82
83 let bytes = BASE64_STANDARD.decode(base64_str).ok()?;
84
85 let hex_string = bytes
86 .iter()
87 .map(|b| format!("{:02x}", b))
88 .collect::<String>();
89
90 Some((algorithm.to_string(), hex_string))
91}
92
93pub fn split_name_email(s: &str) -> (Option<String>, Option<String>) {
129 if let Some(email_start) = s.find('<')
130 && let Some(email_end) = s.find('>')
131 && email_start < email_end
132 {
133 let name = s[..email_start].trim();
134 let email = &s[email_start + 1..email_end];
135 (
136 if name.is_empty() {
137 None
138 } else {
139 Some(name.to_string())
140 },
141 Some(email.to_string()),
142 )
143 } else {
144 (Some(s.trim().to_string()), None)
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151 use std::io::Write;
152 use tempfile::tempdir;
153
154 #[test]
155 fn test_read_file_to_string_success() {
156 let dir = tempdir().unwrap();
157 let file_path = dir.path().join("test.txt");
158 let mut file = File::create(&file_path).unwrap();
159 file.write_all(b"test content").unwrap();
160
161 let content = read_file_to_string(&file_path).unwrap();
162 assert_eq!(content, "test content");
163 }
164
165 #[test]
166 fn test_read_file_to_string_nonexistent() {
167 let path = Path::new("/nonexistent/file.txt");
168 let result = read_file_to_string(path);
169 assert!(result.is_err());
170 }
171
172 #[test]
173 fn test_read_file_to_string_empty() {
174 let dir = tempdir().unwrap();
175 let file_path = dir.path().join("empty.txt");
176 File::create(&file_path).unwrap();
177
178 let content = read_file_to_string(&file_path).unwrap();
179 assert_eq!(content, "");
180 }
181
182 #[test]
183 fn test_npm_purl_scoped_with_version() {
184 let purl = npm_purl("@babel/core", Some("7.0.0")).unwrap();
185 assert_eq!(purl, "pkg:npm/%40babel/core@7.0.0");
186 }
187
188 #[test]
189 fn test_npm_purl_scoped_without_version() {
190 let purl = npm_purl("@babel/core", None).unwrap();
191 assert_eq!(purl, "pkg:npm/%40babel/core");
192 }
193
194 #[test]
195 fn test_npm_purl_unscoped_with_version() {
196 let purl = npm_purl("lodash", Some("4.17.21")).unwrap();
197 assert_eq!(purl, "pkg:npm/lodash@4.17.21");
198 }
199
200 #[test]
201 fn test_npm_purl_unscoped_without_version() {
202 let purl = npm_purl("lodash", None).unwrap();
203 assert_eq!(purl, "pkg:npm/lodash");
204 }
205
206 #[test]
207 fn test_npm_purl_scoped_slash_not_encoded() {
208 let purl = npm_purl("@types/node", Some("18.0.0")).unwrap();
209 assert!(purl.contains("/%40types/node"));
210 assert!(!purl.contains("%2F"));
211 }
212
213 #[test]
214 fn test_parse_sri_sha512() {
215 let (algo, hash) = parse_sri("sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ==").unwrap();
216 assert_eq!(algo, "sha512");
217 assert_eq!(hash.len(), 128);
218 }
219
220 #[test]
221 fn test_parse_sri_sha1() {
222 let (algo, hash) = parse_sri("sha1-w7M6te42DYbg5ijwRorn7yfWVN8=").unwrap();
223 assert_eq!(algo, "sha1");
224 assert_eq!(hash.len(), 40);
225 }
226
227 #[test]
228 fn test_parse_sri_sha256() {
229 let (algo, hash) =
230 parse_sri("sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=").unwrap();
231 assert_eq!(algo, "sha256");
232 assert_eq!(hash.len(), 64);
233 }
234
235 #[test]
236 fn test_parse_sri_invalid_format() {
237 assert!(parse_sri("invalid").is_none());
238 assert!(parse_sri("sha512").is_none());
239 assert!(parse_sri("").is_none());
240 }
241
242 #[test]
243 fn test_parse_sri_invalid_base64() {
244 assert!(parse_sri("sha512-!!!invalid!!!").is_none());
245 }
246
247 #[test]
248 fn test_split_name_email_full_format() {
249 let (name, email) = split_name_email("John Doe <john@example.com>");
250 assert_eq!(name, Some("John Doe".to_string()));
251 assert_eq!(email, Some("john@example.com".to_string()));
252 }
253
254 #[test]
255 fn test_split_name_email_name_only() {
256 let (name, email) = split_name_email("John Doe");
257 assert_eq!(name, Some("John Doe".to_string()));
258 assert_eq!(email, None);
259 }
260
261 #[test]
262 fn test_split_name_email_email_only_plain() {
263 let (name, email) = split_name_email("john@example.com");
264 assert_eq!(name, Some("john@example.com".to_string()));
265 assert_eq!(email, None);
266 }
267
268 #[test]
269 fn test_split_name_email_email_only_brackets() {
270 let (name, email) = split_name_email("<john@example.com>");
271 assert_eq!(name, None);
272 assert_eq!(email, Some("john@example.com".to_string()));
273 }
274
275 #[test]
276 fn test_split_name_email_whitespace_trimming() {
277 let (name, email) = split_name_email(" John Doe < john@example.com > ");
278 assert_eq!(name, Some("John Doe".to_string()));
279 assert_eq!(email, Some(" john@example.com ".to_string()));
280 }
281
282 #[test]
283 fn test_split_name_email_empty_string() {
284 let (name, email) = split_name_email("");
285 assert_eq!(name, Some("".to_string()));
286 assert_eq!(email, None);
287 }
288
289 #[test]
290 fn test_split_name_email_whitespace_only() {
291 let (name, email) = split_name_email(" ");
292 assert_eq!(name, Some("".to_string()));
293 assert_eq!(email, None);
294 }
295
296 #[test]
297 fn test_split_name_email_invalid_bracket_order() {
298 let (name, email) = split_name_email("John >email< Doe");
299 assert_eq!(name, Some("John >email< Doe".to_string()));
300 assert_eq!(email, None);
301 }
302
303 #[test]
304 fn test_split_name_email_missing_close_bracket() {
305 let (name, email) = split_name_email("John Doe <email@example.com");
306 assert_eq!(name, Some("John Doe <email@example.com".to_string()));
307 assert_eq!(email, None);
308 }
309
310 #[test]
311 fn test_split_name_email_missing_open_bracket() {
312 let (name, email) = split_name_email("John Doe email@example.com>");
313 assert_eq!(name, Some("John Doe email@example.com>".to_string()));
314 assert_eq!(email, None);
315 }
316}