1use std::collections::HashSet;
9use std::fs::{self, File};
10use std::hash::Hash;
11use std::io::Read;
12use std::path::Path;
13
14use anyhow::Result;
15use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64_STANDARD};
16use packageurl::PackageUrl;
17
18pub const MAX_MANIFEST_SIZE: u64 = 100 * 1024 * 1024;
20
21pub const MAX_FIELD_LENGTH: usize = 10 * 1024 * 1024;
23
24pub const MAX_ITERATION_COUNT: usize = 100_000;
26
27pub const MAX_RECURSION_DEPTH: usize = 50;
29
30pub struct RecursionGuard<K: Hash + Eq> {
60 depth: usize,
61 visited: HashSet<K>,
62}
63
64impl<K: Hash + Eq> RecursionGuard<K> {
65 pub fn new() -> Self {
66 Self {
67 depth: 0,
68 visited: HashSet::new(),
69 }
70 }
71
72 pub fn exceeded(&self) -> bool {
73 self.depth > MAX_RECURSION_DEPTH
74 }
75
76 pub fn depth(&self) -> usize {
77 self.depth
78 }
79
80 pub fn enter(&mut self, key: K) -> bool {
81 if self.visited.contains(&key) {
82 return true;
83 }
84 self.visited.insert(key);
85 self.depth += 1;
86 false
87 }
88
89 pub fn leave(&mut self, key: K) {
90 self.visited.remove(&key);
91 self.depth -= 1;
92 }
93}
94
95impl RecursionGuard<()> {
96 pub fn depth_only() -> Self {
97 Self::new()
98 }
99
100 pub fn descend(&mut self) -> bool {
101 self.depth += 1;
102 self.exceeded()
103 }
104
105 pub fn ascend(&mut self) {
106 self.depth -= 1;
107 }
108}
109
110impl<K: Hash + Eq> Default for RecursionGuard<K> {
111 fn default() -> Self {
112 Self::new()
113 }
114}
115
116pub fn truncate_field(value: String) -> String {
120 if value.len() <= MAX_FIELD_LENGTH {
121 return value;
122 }
123 let truncated = &value[..value.floor_char_boundary(MAX_FIELD_LENGTH)];
124 crate::parser_warn!(
125 "Truncated field value from {} bytes to {} bytes (MAX_FIELD_LENGTH)",
126 value.len(),
127 truncated.len()
128 );
129 truncated.to_string()
130}
131
132pub fn read_file_to_string(path: &Path, max_size: Option<u64>) -> Result<String> {
159 let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
160
161 let metadata =
162 fs::metadata(path).map_err(|e| anyhow::anyhow!("Cannot stat file {:?}: {}", path, e))?;
163
164 if metadata.len() > limit {
165 anyhow::bail!(
166 "File {:?} is {} bytes, exceeding the {} byte limit",
167 path,
168 metadata.len(),
169 limit
170 );
171 }
172
173 let mut bytes = Vec::with_capacity(metadata.len() as usize);
174 let mut file = File::open(path)?;
175 file.read_to_end(&mut bytes)?;
176
177 match String::from_utf8(bytes) {
178 Ok(s) => Ok(s),
179 Err(err) => {
180 let bytes = err.into_bytes();
181 crate::parser_warn!(
182 "File {:?} contains invalid UTF-8; using lossy conversion",
183 path
184 );
185 Ok(String::from_utf8_lossy(&bytes).into_owned())
186 }
187 }
188}
189
190pub fn npm_purl(full_name: &str, version: Option<&str>) -> Option<String> {
195 let (namespace, name) = if full_name.starts_with('@') {
196 let parts: Vec<&str> = full_name.splitn(2, '/').collect();
197 if parts.len() == 2 {
198 (Some(parts[0]), parts[1])
199 } else {
200 (None, full_name)
201 }
202 } else {
203 (None, full_name)
204 };
205
206 let mut purl = PackageUrl::new("npm", name).ok()?;
207
208 if let Some(ns) = namespace {
209 purl.with_namespace(ns).ok()?;
210 }
211
212 if let Some(ver) = version {
213 purl.with_version(ver).ok()?;
214 }
215
216 Some(purl.to_string())
217}
218
219pub fn parse_sri(integrity: &str) -> Option<(String, String)> {
225 let parts: Vec<&str> = integrity.splitn(2, '-').collect();
226 if parts.len() != 2 {
227 return None;
228 }
229
230 let algorithm = parts[0];
231 let base64_str = parts[1];
232
233 let bytes = BASE64_STANDARD.decode(base64_str).ok()?;
234
235 let hex_string = bytes
236 .iter()
237 .map(|b| format!("{:02x}", b))
238 .collect::<String>();
239
240 Some((algorithm.to_string(), hex_string))
241}
242
243pub fn split_name_email(s: &str) -> (Option<String>, Option<String>) {
279 if let Some(email_start) = s.find('<')
280 && let Some(email_end) = s.find('>')
281 && email_start < email_end
282 {
283 let name = s[..email_start].trim();
284 let email = &s[email_start + 1..email_end];
285 (
286 if name.is_empty() {
287 None
288 } else {
289 Some(name.to_string())
290 },
291 Some(email.to_string()),
292 )
293 } else {
294 (Some(s.trim().to_string()), None)
295 }
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301 use std::io::Write;
302 use tempfile::tempdir;
303
304 #[test]
305 fn test_read_file_to_string_success() {
306 let dir = tempdir().unwrap();
307 let file_path = dir.path().join("test.txt");
308 let mut file = File::create(&file_path).unwrap();
309 file.write_all(b"test content").unwrap();
310
311 let content = read_file_to_string(&file_path, None).unwrap();
312 assert_eq!(content, "test content");
313 }
314
315 #[test]
316 fn test_read_file_to_string_nonexistent() {
317 let path = Path::new("/nonexistent/file.txt");
318 let result = read_file_to_string(path, None);
319 assert!(result.is_err());
320 }
321
322 #[test]
323 fn test_read_file_to_string_empty() {
324 let dir = tempdir().unwrap();
325 let file_path = dir.path().join("empty.txt");
326 File::create(&file_path).unwrap();
327
328 let content = read_file_to_string(&file_path, None).unwrap();
329 assert_eq!(content, "");
330 }
331
332 #[test]
333 fn test_npm_purl_scoped_with_version() {
334 let purl = npm_purl("@babel/core", Some("7.0.0")).unwrap();
335 assert_eq!(purl, "pkg:npm/%40babel/core@7.0.0");
336 }
337
338 #[test]
339 fn test_npm_purl_scoped_without_version() {
340 let purl = npm_purl("@babel/core", None).unwrap();
341 assert_eq!(purl, "pkg:npm/%40babel/core");
342 }
343
344 #[test]
345 fn test_npm_purl_unscoped_with_version() {
346 let purl = npm_purl("lodash", Some("4.17.21")).unwrap();
347 assert_eq!(purl, "pkg:npm/lodash@4.17.21");
348 }
349
350 #[test]
351 fn test_npm_purl_unscoped_without_version() {
352 let purl = npm_purl("lodash", None).unwrap();
353 assert_eq!(purl, "pkg:npm/lodash");
354 }
355
356 #[test]
357 fn test_npm_purl_scoped_slash_not_encoded() {
358 let purl = npm_purl("@types/node", Some("18.0.0")).unwrap();
359 assert!(purl.contains("/%40types/node"));
360 assert!(!purl.contains("%2F"));
361 }
362
363 #[test]
364 fn test_parse_sri_sha512() {
365 let (algo, hash) = parse_sri("sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ==").unwrap();
366 assert_eq!(algo, "sha512");
367 assert_eq!(hash.len(), 128);
368 }
369
370 #[test]
371 fn test_parse_sri_sha1() {
372 let (algo, hash) = parse_sri("sha1-w7M6te42DYbg5ijwRorn7yfWVN8=").unwrap();
373 assert_eq!(algo, "sha1");
374 assert_eq!(hash.len(), 40);
375 }
376
377 #[test]
378 fn test_parse_sri_sha256() {
379 let (algo, hash) =
380 parse_sri("sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=").unwrap();
381 assert_eq!(algo, "sha256");
382 assert_eq!(hash.len(), 64);
383 }
384
385 #[test]
386 fn test_parse_sri_invalid_format() {
387 assert!(parse_sri("invalid").is_none());
388 assert!(parse_sri("sha512").is_none());
389 assert!(parse_sri("").is_none());
390 }
391
392 #[test]
393 fn test_parse_sri_invalid_base64() {
394 assert!(parse_sri("sha512-!!!invalid!!!").is_none());
395 }
396
397 #[test]
398 fn test_split_name_email_full_format() {
399 let (name, email) = split_name_email("John Doe <john@example.com>");
400 assert_eq!(name, Some("John Doe".to_string()));
401 assert_eq!(email, Some("john@example.com".to_string()));
402 }
403
404 #[test]
405 fn test_split_name_email_name_only() {
406 let (name, email) = split_name_email("John Doe");
407 assert_eq!(name, Some("John Doe".to_string()));
408 assert_eq!(email, None);
409 }
410
411 #[test]
412 fn test_split_name_email_email_only_plain() {
413 let (name, email) = split_name_email("john@example.com");
414 assert_eq!(name, Some("john@example.com".to_string()));
415 assert_eq!(email, None);
416 }
417
418 #[test]
419 fn test_split_name_email_email_only_brackets() {
420 let (name, email) = split_name_email("<john@example.com>");
421 assert_eq!(name, None);
422 assert_eq!(email, Some("john@example.com".to_string()));
423 }
424
425 #[test]
426 fn test_split_name_email_whitespace_trimming() {
427 let (name, email) = split_name_email(" John Doe < john@example.com > ");
428 assert_eq!(name, Some("John Doe".to_string()));
429 assert_eq!(email, Some(" john@example.com ".to_string()));
430 }
431
432 #[test]
433 fn test_split_name_email_empty_string() {
434 let (name, email) = split_name_email("");
435 assert_eq!(name, Some("".to_string()));
436 assert_eq!(email, None);
437 }
438
439 #[test]
440 fn test_split_name_email_whitespace_only() {
441 let (name, email) = split_name_email(" ");
442 assert_eq!(name, Some("".to_string()));
443 assert_eq!(email, None);
444 }
445
446 #[test]
447 fn test_split_name_email_invalid_bracket_order() {
448 let (name, email) = split_name_email("John >email< Doe");
449 assert_eq!(name, Some("John >email< Doe".to_string()));
450 assert_eq!(email, None);
451 }
452
453 #[test]
454 fn test_split_name_email_missing_close_bracket() {
455 let (name, email) = split_name_email("John Doe <email@example.com");
456 assert_eq!(name, Some("John Doe <email@example.com".to_string()));
457 assert_eq!(email, None);
458 }
459
460 #[test]
461 fn test_split_name_email_missing_open_bracket() {
462 let (name, email) = split_name_email("John Doe email@example.com>");
463 assert_eq!(name, Some("John Doe email@example.com>".to_string()));
464 assert_eq!(email, None);
465 }
466
467 #[test]
468 fn test_read_file_to_string_oversized() {
469 let dir = tempdir().unwrap();
470 let file_path = dir.path().join("big.txt");
471 fs::write(&file_path, "x").unwrap();
472
473 let result = read_file_to_string(&file_path, Some(0));
474 assert!(result.is_err());
475 }
476
477 #[test]
478 fn test_read_file_to_string_lossy_utf8() {
479 let dir = tempdir().unwrap();
480 let file_path = dir.path().join("bad_utf8.txt");
481 let mut file = File::create(&file_path).unwrap();
482 file.write_all(b"hello\xffworld").unwrap();
483
484 let content = read_file_to_string(&file_path, None).unwrap();
485 assert!(content.contains("hello"));
486 assert!(content.contains("world"));
487 }
488
489 #[test]
490 fn test_truncate_field_within_limit() {
491 let s = "short value".to_string();
492 assert_eq!(truncate_field(s.clone()), s);
493 }
494
495 #[test]
496 fn test_truncate_field_exceeds_limit() {
497 let long = "x".repeat(MAX_FIELD_LENGTH + 100);
498 let truncated = truncate_field(long);
499 assert!(truncated.len() <= MAX_FIELD_LENGTH);
500 }
501}