1use std::collections::HashSet;
9use std::fs::{self, File};
10use std::hash::Hash;
11use std::io::Read;
12use std::path::Path;
13
14use anyhow::Result;
15use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64_STANDARD};
16use packageurl::PackageUrl;
17
18pub const MAX_MANIFEST_SIZE: u64 = 100 * 1024 * 1024;
20
21pub const MAX_FIELD_LENGTH: usize = 10 * 1024 * 1024;
23
24pub const MAX_ITERATION_COUNT: usize = 100_000;
26
27pub const MAX_RECURSION_DEPTH: usize = 50;
29
30pub struct RecursionGuard<K: Hash + Eq> {
63 depth: usize,
64 visited: HashSet<K>,
65}
66
67impl<K: Hash + Eq> RecursionGuard<K> {
68 pub fn new() -> Self {
69 Self {
70 depth: 0,
71 visited: HashSet::new(),
72 }
73 }
74
75 pub fn exceeded(&self) -> bool {
76 self.depth > MAX_RECURSION_DEPTH
77 }
78
79 pub fn depth(&self) -> usize {
80 self.depth
81 }
82
83 pub fn enter(&mut self, key: K) -> bool {
84 if self.visited.contains(&key) {
85 return true;
86 }
87 self.visited.insert(key);
88 self.depth += 1;
89 false
90 }
91
92 pub fn leave(&mut self, key: K) {
93 self.visited.remove(&key);
94 self.depth -= 1;
95 }
96}
97
98impl RecursionGuard<()> {
99 pub fn depth_only() -> Self {
100 Self::new()
101 }
102
103 pub fn descend(&mut self) -> bool {
104 self.depth += 1;
105 self.exceeded()
106 }
107
108 pub fn ascend(&mut self) {
109 self.depth -= 1;
110 }
111}
112
113impl<K: Hash + Eq> Default for RecursionGuard<K> {
114 fn default() -> Self {
115 Self::new()
116 }
117}
118
119pub fn truncate_field(value: String) -> String {
123 if value.len() <= MAX_FIELD_LENGTH {
124 return value;
125 }
126 let truncated = &value[..value.floor_char_boundary(MAX_FIELD_LENGTH)];
127 crate::parser_warn!(
128 "Truncated field value from {} bytes to {} bytes (MAX_FIELD_LENGTH)",
129 value.len(),
130 truncated.len()
131 );
132 truncated.to_string()
133}
134
135pub fn read_file_to_string(path: &Path, max_size: Option<u64>) -> Result<String> {
156 let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
157
158 let metadata =
159 fs::metadata(path).map_err(|e| anyhow::anyhow!("Cannot stat file {:?}: {}", path, e))?;
160
161 if metadata.len() > limit {
162 anyhow::bail!(
163 "File {:?} is {} bytes, exceeding the {} byte limit",
164 path,
165 metadata.len(),
166 limit
167 );
168 }
169
170 let mut bytes = Vec::with_capacity(metadata.len() as usize);
171 let mut file = File::open(path)?;
172 file.read_to_end(&mut bytes)?;
173
174 match String::from_utf8(bytes) {
175 Ok(s) => Ok(s),
176 Err(err) => {
177 let bytes = err.into_bytes();
178 crate::parser_warn!(
179 "File {:?} contains invalid UTF-8; using lossy conversion",
180 path
181 );
182 Ok(String::from_utf8_lossy(&bytes).into_owned())
183 }
184 }
185}
186
187pub fn npm_purl(full_name: &str, version: Option<&str>) -> Option<String> {
192 let (namespace, name) = if full_name.starts_with('@') {
193 let parts: Vec<&str> = full_name.splitn(2, '/').collect();
194 if parts.len() == 2 {
195 (Some(parts[0]), parts[1])
196 } else {
197 (None, full_name)
198 }
199 } else {
200 (None, full_name)
201 };
202
203 let mut purl = PackageUrl::new("npm", name).ok()?;
204
205 if let Some(ns) = namespace {
206 purl.with_namespace(ns).ok()?;
207 }
208
209 if let Some(ver) = version {
210 purl.with_version(ver).ok()?;
211 }
212
213 Some(purl.to_string())
214}
215
216pub fn parse_sri(integrity: &str) -> Option<(String, String)> {
222 let parts: Vec<&str> = integrity.splitn(2, '-').collect();
223 if parts.len() != 2 {
224 return None;
225 }
226
227 let algorithm = parts[0];
228 let base64_str = parts[1];
229
230 let bytes = BASE64_STANDARD.decode(base64_str).ok()?;
231
232 let hex_string = bytes
233 .iter()
234 .map(|b| format!("{:02x}", b))
235 .collect::<String>();
236
237 Some((algorithm.to_string(), hex_string))
238}
239
240pub fn split_name_email(s: &str) -> (Option<String>, Option<String>) {
260 if let Some(email_start) = s.find('<')
261 && let Some(email_end) = s.find('>')
262 && email_start < email_end
263 {
264 let name = s[..email_start].trim();
265 let email = &s[email_start + 1..email_end];
266 (
267 if name.is_empty() {
268 None
269 } else {
270 Some(name.to_string())
271 },
272 Some(email.to_string()),
273 )
274 } else {
275 (Some(s.trim().to_string()), None)
276 }
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282 use std::io::Write;
283 use tempfile::tempdir;
284
285 #[test]
286 fn test_recursion_guard_tracks_depth_and_cycles() {
287 let mut guard = RecursionGuard::new();
288
289 assert_eq!(guard.depth(), 0);
290 assert!(!guard.exceeded());
291
292 assert!(!guard.enter("root"));
293 assert_eq!(guard.depth(), 1);
294 assert!(!guard.enter("child"));
295 assert_eq!(guard.depth(), 2);
296
297 assert!(guard.enter("root"));
298 assert_eq!(guard.depth(), 2);
299
300 guard.leave("child");
301 assert_eq!(guard.depth(), 1);
302 guard.leave("root");
303 assert_eq!(guard.depth(), 0);
304 assert!(!guard.exceeded());
305 }
306
307 #[test]
308 fn test_recursion_guard_depth_limit_and_depth_only_mode() {
309 let mut guard = RecursionGuard::<()>::depth_only();
310
311 for _ in 0..MAX_RECURSION_DEPTH {
312 assert!(!guard.descend());
313 }
314
315 assert_eq!(guard.depth(), MAX_RECURSION_DEPTH);
316 assert!(!guard.exceeded());
317
318 assert!(guard.descend());
319 assert_eq!(guard.depth(), MAX_RECURSION_DEPTH + 1);
320 assert!(guard.exceeded());
321
322 guard.ascend();
323 assert_eq!(guard.depth(), MAX_RECURSION_DEPTH);
324 assert!(!guard.exceeded());
325 }
326
327 #[test]
328 fn test_read_file_to_string_success() {
329 let dir = tempdir().unwrap();
330 let file_path = dir.path().join("test.txt");
331 let mut file = File::create(&file_path).unwrap();
332 file.write_all(b"test content").unwrap();
333
334 let content = read_file_to_string(&file_path, None).unwrap();
335 assert_eq!(content, "test content");
336 }
337
338 #[test]
339 fn test_read_file_to_string_nonexistent() {
340 let path = Path::new("/nonexistent/file.txt");
341 let result = read_file_to_string(path, None);
342 assert!(result.is_err());
343 }
344
345 #[test]
346 fn test_read_file_to_string_empty() {
347 let dir = tempdir().unwrap();
348 let file_path = dir.path().join("empty.txt");
349 File::create(&file_path).unwrap();
350
351 let content = read_file_to_string(&file_path, None).unwrap();
352 assert_eq!(content, "");
353 }
354
355 #[test]
356 fn test_npm_purl_scoped_with_version() {
357 let purl = npm_purl("@babel/core", Some("7.0.0")).unwrap();
358 assert_eq!(purl, "pkg:npm/%40babel/core@7.0.0");
359 }
360
361 #[test]
362 fn test_npm_purl_scoped_without_version() {
363 let purl = npm_purl("@babel/core", None).unwrap();
364 assert_eq!(purl, "pkg:npm/%40babel/core");
365 }
366
367 #[test]
368 fn test_npm_purl_unscoped_with_version() {
369 let purl = npm_purl("lodash", Some("4.17.21")).unwrap();
370 assert_eq!(purl, "pkg:npm/lodash@4.17.21");
371 }
372
373 #[test]
374 fn test_npm_purl_unscoped_without_version() {
375 let purl = npm_purl("lodash", None).unwrap();
376 assert_eq!(purl, "pkg:npm/lodash");
377 }
378
379 #[test]
380 fn test_npm_purl_scoped_slash_not_encoded() {
381 let purl = npm_purl("@types/node", Some("18.0.0")).unwrap();
382 assert!(purl.contains("/%40types/node"));
383 assert!(!purl.contains("%2F"));
384 }
385
386 #[test]
387 fn test_parse_sri_sha512() {
388 let (algo, hash) = parse_sri("sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ==").unwrap();
389 assert_eq!(algo, "sha512");
390 assert_eq!(hash.len(), 128);
391 }
392
393 #[test]
394 fn test_parse_sri_sha1() {
395 let (algo, hash) = parse_sri("sha1-w7M6te42DYbg5ijwRorn7yfWVN8=").unwrap();
396 assert_eq!(algo, "sha1");
397 assert_eq!(hash.len(), 40);
398 }
399
400 #[test]
401 fn test_parse_sri_sha256() {
402 let (algo, hash) =
403 parse_sri("sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=").unwrap();
404 assert_eq!(algo, "sha256");
405 assert_eq!(hash.len(), 64);
406 }
407
408 #[test]
409 fn test_parse_sri_invalid_format() {
410 assert!(parse_sri("invalid").is_none());
411 assert!(parse_sri("sha512").is_none());
412 assert!(parse_sri("").is_none());
413 }
414
415 #[test]
416 fn test_parse_sri_invalid_base64() {
417 assert!(parse_sri("sha512-!!!invalid!!!").is_none());
418 }
419
420 #[test]
421 fn test_split_name_email_full_format() {
422 let (name, email) = split_name_email("John Doe <john@example.com>");
423 assert_eq!(name, Some("John Doe".to_string()));
424 assert_eq!(email, Some("john@example.com".to_string()));
425 }
426
427 #[test]
428 fn test_split_name_email_name_only() {
429 let (name, email) = split_name_email("John Doe");
430 assert_eq!(name, Some("John Doe".to_string()));
431 assert_eq!(email, None);
432 }
433
434 #[test]
435 fn test_split_name_email_email_only_plain() {
436 let (name, email) = split_name_email("john@example.com");
437 assert_eq!(name, Some("john@example.com".to_string()));
438 assert_eq!(email, None);
439 }
440
441 #[test]
442 fn test_split_name_email_email_only_brackets() {
443 let (name, email) = split_name_email("<john@example.com>");
444 assert_eq!(name, None);
445 assert_eq!(email, Some("john@example.com".to_string()));
446 }
447
448 #[test]
449 fn test_split_name_email_whitespace_trimming() {
450 let (name, email) = split_name_email(" John Doe < john@example.com > ");
451 assert_eq!(name, Some("John Doe".to_string()));
452 assert_eq!(email, Some(" john@example.com ".to_string()));
453 }
454
455 #[test]
456 fn test_split_name_email_empty_string() {
457 let (name, email) = split_name_email("");
458 assert_eq!(name, Some("".to_string()));
459 assert_eq!(email, None);
460 }
461
462 #[test]
463 fn test_split_name_email_whitespace_only() {
464 let (name, email) = split_name_email(" ");
465 assert_eq!(name, Some("".to_string()));
466 assert_eq!(email, None);
467 }
468
469 #[test]
470 fn test_split_name_email_invalid_bracket_order() {
471 let (name, email) = split_name_email("John >email< Doe");
472 assert_eq!(name, Some("John >email< Doe".to_string()));
473 assert_eq!(email, None);
474 }
475
476 #[test]
477 fn test_split_name_email_missing_close_bracket() {
478 let (name, email) = split_name_email("John Doe <email@example.com");
479 assert_eq!(name, Some("John Doe <email@example.com".to_string()));
480 assert_eq!(email, None);
481 }
482
483 #[test]
484 fn test_split_name_email_missing_open_bracket() {
485 let (name, email) = split_name_email("John Doe email@example.com>");
486 assert_eq!(name, Some("John Doe email@example.com>".to_string()));
487 assert_eq!(email, None);
488 }
489
490 #[test]
491 fn test_read_file_to_string_oversized() {
492 let dir = tempdir().unwrap();
493 let file_path = dir.path().join("big.txt");
494 fs::write(&file_path, "x").unwrap();
495
496 let result = read_file_to_string(&file_path, Some(0));
497 assert!(result.is_err());
498 }
499
500 #[test]
501 fn test_read_file_to_string_lossy_utf8() {
502 let dir = tempdir().unwrap();
503 let file_path = dir.path().join("bad_utf8.txt");
504 let mut file = File::create(&file_path).unwrap();
505 file.write_all(b"hello\xffworld").unwrap();
506
507 let content = read_file_to_string(&file_path, None).unwrap();
508 assert!(content.contains("hello"));
509 assert!(content.contains("world"));
510 }
511
512 #[test]
513 fn test_truncate_field_within_limit() {
514 let s = "short value".to_string();
515 assert_eq!(truncate_field(s.clone()), s);
516 }
517
518 #[test]
519 fn test_truncate_field_exceeds_limit() {
520 let long = "x".repeat(MAX_FIELD_LENGTH + 100);
521 let truncated = truncate_field(long);
522 assert!(truncated.len() <= MAX_FIELD_LENGTH);
523 }
524}