1use std::collections::HashSet;
6use std::fs::{self, File};
7use std::hash::Hash;
8use std::io::Read;
9use std::path::Path;
10
11use anyhow::Result;
12use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64_STANDARD};
13use packageurl::PackageUrl;
14
15pub const MAX_MANIFEST_SIZE: u64 = 100 * 1024 * 1024;
17
18pub const MAX_FIELD_LENGTH: usize = 10 * 1024 * 1024;
20
21pub const MAX_ITERATION_COUNT: usize = 100_000;
23
24pub const MAX_RECURSION_DEPTH: usize = 50;
26
27pub struct RecursionGuard<K: Hash + Eq> {
57 depth: usize,
58 visited: HashSet<K>,
59}
60
61impl<K: Hash + Eq> RecursionGuard<K> {
62 pub fn new() -> Self {
63 Self {
64 depth: 0,
65 visited: HashSet::new(),
66 }
67 }
68
69 pub fn exceeded(&self) -> bool {
70 self.depth > MAX_RECURSION_DEPTH
71 }
72
73 pub fn depth(&self) -> usize {
74 self.depth
75 }
76
77 pub fn enter(&mut self, key: K) -> bool {
78 if self.visited.contains(&key) {
79 return true;
80 }
81 self.visited.insert(key);
82 self.depth += 1;
83 false
84 }
85
86 pub fn leave(&mut self, key: K) {
87 self.visited.remove(&key);
88 self.depth -= 1;
89 }
90}
91
92impl RecursionGuard<()> {
93 pub fn depth_only() -> Self {
94 Self::new()
95 }
96
97 pub fn descend(&mut self) -> bool {
98 self.depth += 1;
99 self.exceeded()
100 }
101
102 pub fn ascend(&mut self) {
103 self.depth -= 1;
104 }
105}
106
107impl<K: Hash + Eq> Default for RecursionGuard<K> {
108 fn default() -> Self {
109 Self::new()
110 }
111}
112
113pub fn truncate_field(value: String) -> String {
117 if value.len() <= MAX_FIELD_LENGTH {
118 return value;
119 }
120 let truncated = &value[..value.floor_char_boundary(MAX_FIELD_LENGTH)];
121 crate::parser_warn!(
122 "Truncated field value from {} bytes to {} bytes (MAX_FIELD_LENGTH)",
123 value.len(),
124 truncated.len()
125 );
126 truncated.to_string()
127}
128
129pub fn read_file_to_string(path: &Path, max_size: Option<u64>) -> Result<String> {
156 let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
157
158 let metadata =
159 fs::metadata(path).map_err(|e| anyhow::anyhow!("Cannot stat file {:?}: {}", path, e))?;
160
161 if metadata.len() > limit {
162 anyhow::bail!(
163 "File {:?} is {} bytes, exceeding the {} byte limit",
164 path,
165 metadata.len(),
166 limit
167 );
168 }
169
170 let mut bytes = Vec::with_capacity(metadata.len() as usize);
171 let mut file = File::open(path)?;
172 file.read_to_end(&mut bytes)?;
173
174 match String::from_utf8(bytes) {
175 Ok(s) => Ok(s),
176 Err(err) => {
177 let bytes = err.into_bytes();
178 crate::parser_warn!(
179 "File {:?} contains invalid UTF-8; using lossy conversion",
180 path
181 );
182 Ok(String::from_utf8_lossy(&bytes).into_owned())
183 }
184 }
185}
186
187pub fn npm_purl(full_name: &str, version: Option<&str>) -> Option<String> {
192 let (namespace, name) = if full_name.starts_with('@') {
193 let parts: Vec<&str> = full_name.splitn(2, '/').collect();
194 if parts.len() == 2 {
195 (Some(parts[0]), parts[1])
196 } else {
197 (None, full_name)
198 }
199 } else {
200 (None, full_name)
201 };
202
203 let mut purl = PackageUrl::new("npm", name).ok()?;
204
205 if let Some(ns) = namespace {
206 purl.with_namespace(ns).ok()?;
207 }
208
209 if let Some(ver) = version {
210 purl.with_version(ver).ok()?;
211 }
212
213 Some(purl.to_string())
214}
215
216pub fn parse_sri(integrity: &str) -> Option<(String, String)> {
222 let parts: Vec<&str> = integrity.splitn(2, '-').collect();
223 if parts.len() != 2 {
224 return None;
225 }
226
227 let algorithm = parts[0];
228 let base64_str = parts[1];
229
230 let bytes = BASE64_STANDARD.decode(base64_str).ok()?;
231
232 let hex_string = bytes
233 .iter()
234 .map(|b| format!("{:02x}", b))
235 .collect::<String>();
236
237 Some((algorithm.to_string(), hex_string))
238}
239
240pub fn split_name_email(s: &str) -> (Option<String>, Option<String>) {
276 if let Some(email_start) = s.find('<')
277 && let Some(email_end) = s.find('>')
278 && email_start < email_end
279 {
280 let name = s[..email_start].trim();
281 let email = &s[email_start + 1..email_end];
282 (
283 if name.is_empty() {
284 None
285 } else {
286 Some(name.to_string())
287 },
288 Some(email.to_string()),
289 )
290 } else {
291 (Some(s.trim().to_string()), None)
292 }
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298 use std::io::Write;
299 use tempfile::tempdir;
300
301 #[test]
302 fn test_read_file_to_string_success() {
303 let dir = tempdir().unwrap();
304 let file_path = dir.path().join("test.txt");
305 let mut file = File::create(&file_path).unwrap();
306 file.write_all(b"test content").unwrap();
307
308 let content = read_file_to_string(&file_path, None).unwrap();
309 assert_eq!(content, "test content");
310 }
311
312 #[test]
313 fn test_read_file_to_string_nonexistent() {
314 let path = Path::new("/nonexistent/file.txt");
315 let result = read_file_to_string(path, None);
316 assert!(result.is_err());
317 }
318
319 #[test]
320 fn test_read_file_to_string_empty() {
321 let dir = tempdir().unwrap();
322 let file_path = dir.path().join("empty.txt");
323 File::create(&file_path).unwrap();
324
325 let content = read_file_to_string(&file_path, None).unwrap();
326 assert_eq!(content, "");
327 }
328
329 #[test]
330 fn test_npm_purl_scoped_with_version() {
331 let purl = npm_purl("@babel/core", Some("7.0.0")).unwrap();
332 assert_eq!(purl, "pkg:npm/%40babel/core@7.0.0");
333 }
334
335 #[test]
336 fn test_npm_purl_scoped_without_version() {
337 let purl = npm_purl("@babel/core", None).unwrap();
338 assert_eq!(purl, "pkg:npm/%40babel/core");
339 }
340
341 #[test]
342 fn test_npm_purl_unscoped_with_version() {
343 let purl = npm_purl("lodash", Some("4.17.21")).unwrap();
344 assert_eq!(purl, "pkg:npm/lodash@4.17.21");
345 }
346
347 #[test]
348 fn test_npm_purl_unscoped_without_version() {
349 let purl = npm_purl("lodash", None).unwrap();
350 assert_eq!(purl, "pkg:npm/lodash");
351 }
352
353 #[test]
354 fn test_npm_purl_scoped_slash_not_encoded() {
355 let purl = npm_purl("@types/node", Some("18.0.0")).unwrap();
356 assert!(purl.contains("/%40types/node"));
357 assert!(!purl.contains("%2F"));
358 }
359
360 #[test]
361 fn test_parse_sri_sha512() {
362 let (algo, hash) = parse_sri("sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ==").unwrap();
363 assert_eq!(algo, "sha512");
364 assert_eq!(hash.len(), 128);
365 }
366
367 #[test]
368 fn test_parse_sri_sha1() {
369 let (algo, hash) = parse_sri("sha1-w7M6te42DYbg5ijwRorn7yfWVN8=").unwrap();
370 assert_eq!(algo, "sha1");
371 assert_eq!(hash.len(), 40);
372 }
373
374 #[test]
375 fn test_parse_sri_sha256() {
376 let (algo, hash) =
377 parse_sri("sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=").unwrap();
378 assert_eq!(algo, "sha256");
379 assert_eq!(hash.len(), 64);
380 }
381
382 #[test]
383 fn test_parse_sri_invalid_format() {
384 assert!(parse_sri("invalid").is_none());
385 assert!(parse_sri("sha512").is_none());
386 assert!(parse_sri("").is_none());
387 }
388
389 #[test]
390 fn test_parse_sri_invalid_base64() {
391 assert!(parse_sri("sha512-!!!invalid!!!").is_none());
392 }
393
394 #[test]
395 fn test_split_name_email_full_format() {
396 let (name, email) = split_name_email("John Doe <john@example.com>");
397 assert_eq!(name, Some("John Doe".to_string()));
398 assert_eq!(email, Some("john@example.com".to_string()));
399 }
400
401 #[test]
402 fn test_split_name_email_name_only() {
403 let (name, email) = split_name_email("John Doe");
404 assert_eq!(name, Some("John Doe".to_string()));
405 assert_eq!(email, None);
406 }
407
408 #[test]
409 fn test_split_name_email_email_only_plain() {
410 let (name, email) = split_name_email("john@example.com");
411 assert_eq!(name, Some("john@example.com".to_string()));
412 assert_eq!(email, None);
413 }
414
415 #[test]
416 fn test_split_name_email_email_only_brackets() {
417 let (name, email) = split_name_email("<john@example.com>");
418 assert_eq!(name, None);
419 assert_eq!(email, Some("john@example.com".to_string()));
420 }
421
422 #[test]
423 fn test_split_name_email_whitespace_trimming() {
424 let (name, email) = split_name_email(" John Doe < john@example.com > ");
425 assert_eq!(name, Some("John Doe".to_string()));
426 assert_eq!(email, Some(" john@example.com ".to_string()));
427 }
428
429 #[test]
430 fn test_split_name_email_empty_string() {
431 let (name, email) = split_name_email("");
432 assert_eq!(name, Some("".to_string()));
433 assert_eq!(email, None);
434 }
435
436 #[test]
437 fn test_split_name_email_whitespace_only() {
438 let (name, email) = split_name_email(" ");
439 assert_eq!(name, Some("".to_string()));
440 assert_eq!(email, None);
441 }
442
443 #[test]
444 fn test_split_name_email_invalid_bracket_order() {
445 let (name, email) = split_name_email("John >email< Doe");
446 assert_eq!(name, Some("John >email< Doe".to_string()));
447 assert_eq!(email, None);
448 }
449
450 #[test]
451 fn test_split_name_email_missing_close_bracket() {
452 let (name, email) = split_name_email("John Doe <email@example.com");
453 assert_eq!(name, Some("John Doe <email@example.com".to_string()));
454 assert_eq!(email, None);
455 }
456
457 #[test]
458 fn test_split_name_email_missing_open_bracket() {
459 let (name, email) = split_name_email("John Doe email@example.com>");
460 assert_eq!(name, Some("John Doe email@example.com>".to_string()));
461 assert_eq!(email, None);
462 }
463
464 #[test]
465 fn test_read_file_to_string_oversized() {
466 let dir = tempdir().unwrap();
467 let file_path = dir.path().join("big.txt");
468 fs::write(&file_path, "x").unwrap();
469
470 let result = read_file_to_string(&file_path, Some(0));
471 assert!(result.is_err());
472 }
473
474 #[test]
475 fn test_read_file_to_string_lossy_utf8() {
476 let dir = tempdir().unwrap();
477 let file_path = dir.path().join("bad_utf8.txt");
478 let mut file = File::create(&file_path).unwrap();
479 file.write_all(b"hello\xffworld").unwrap();
480
481 let content = read_file_to_string(&file_path, None).unwrap();
482 assert!(content.contains("hello"));
483 assert!(content.contains("world"));
484 }
485
486 #[test]
487 fn test_truncate_field_within_limit() {
488 let s = "short value".to_string();
489 assert_eq!(truncate_field(s.clone()), s);
490 }
491
492 #[test]
493 fn test_truncate_field_exceeds_limit() {
494 let long = "x".repeat(MAX_FIELD_LENGTH + 100);
495 let truncated = truncate_field(long);
496 assert!(truncated.len() <= MAX_FIELD_LENGTH);
497 }
498}