1use std::collections::HashMap;
14use std::io::Read;
15use std::path::Path;
16
17use flate2::read::GzDecoder;
18use tar::Archive;
19
20use crate::manifest::schema::PatchFileInfo;
21
22const MAX_TOTAL_DECOMPRESSED_BYTES: u64 = 64 * 1024 * 1024;
26
27const MAX_ENTRY_BYTES: u64 = 16 * 1024 * 1024;
31
32const MAX_ENTRIES: usize = 10_000;
36
37#[derive(Debug, thiserror::Error)]
39pub enum ArchiveError {
40 #[error("archive I/O error: {0}")]
41 Io(#[from] std::io::Error),
42 #[error("entry path {0:?} escapes the archive root")]
43 UnsafePath(String),
44 #[error("entry {path:?} is {size} bytes (max {max})")]
45 EntryTooLarge { path: String, size: u64, max: u64 },
46 #[error("archive contains more than {0} entries")]
47 TooManyEntries(usize),
48}
49
50fn normalize_entry_path(path: &str) -> &str {
53 path.strip_prefix("package/").unwrap_or(path)
54}
55
56pub fn read_archive_to_map(archive_path: &Path) -> Result<HashMap<String, Vec<u8>>, ArchiveError> {
71 let file = std::fs::File::open(archive_path)?;
72 let bounded = GzDecoder::new(file).take(MAX_TOTAL_DECOMPRESSED_BYTES);
76 let mut tar = Archive::new(bounded);
77
78 let mut out: HashMap<String, Vec<u8>> = HashMap::new();
79 let mut entry_count: usize = 0;
80 for entry in tar.entries()? {
81 let mut entry = entry?;
82
83 entry_count += 1;
84 if entry_count > MAX_ENTRIES {
85 return Err(ArchiveError::TooManyEntries(MAX_ENTRIES));
86 }
87
88 if entry.header().entry_type() != tar::EntryType::Regular {
90 continue;
91 }
92
93 let path = entry.path()?;
94 let path_str = path.to_string_lossy().to_string();
95
96 let normalized = normalize_entry_path(&path_str).to_string();
106 let normalized_path = Path::new(&normalized);
107
108 let leading_separator = normalized
116 .as_bytes()
117 .first()
118 .is_some_and(|b| *b == b'/' || *b == b'\\');
119 if normalized_path.is_absolute()
120 || leading_separator
121 || normalized_path
122 .components()
123 .any(|c| matches!(c, std::path::Component::ParentDir))
124 {
125 return Err(ArchiveError::UnsafePath(path_str));
126 }
127
128 let size = entry.size();
132 if size > MAX_ENTRY_BYTES {
133 return Err(ArchiveError::EntryTooLarge {
134 path: path_str,
135 size,
136 max: MAX_ENTRY_BYTES,
137 });
138 }
139
140 let mut bytes = Vec::with_capacity(size as usize);
143 entry.read_to_end(&mut bytes)?;
144 out.insert(normalized, bytes);
145 }
146
147 Ok(out)
148}
149
150pub fn read_archive_filtered(
155 archive_path: &Path,
156 expected_files: &HashMap<String, PatchFileInfo>,
157) -> Result<HashMap<String, Vec<u8>>, ArchiveError> {
158 let allowed: std::collections::HashSet<String> = expected_files
159 .keys()
160 .map(|k| normalize_entry_path(k).to_string())
161 .collect();
162
163 let all = read_archive_to_map(archive_path)?;
164 Ok(all
165 .into_iter()
166 .filter(|(k, _)| allowed.contains(k))
167 .collect())
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173 use flate2::write::GzEncoder;
174 use flate2::Compression;
175 use std::io::Write;
176 use tar::Builder;
177
178 fn write_archive(path: &Path, entries: &[(&str, &[u8])]) {
179 let file = std::fs::File::create(path).unwrap();
180 let gz = GzEncoder::new(file, Compression::default());
181 let mut builder = Builder::new(gz);
182 for (name, data) in entries {
183 let mut header = tar::Header::new_gnu();
184 header.set_size(data.len() as u64);
185 header.set_mode(0o644);
186 header.set_cksum();
187 builder.append_data(&mut header, name, *data).unwrap();
188 }
189 builder.into_inner().unwrap().finish().unwrap();
190 }
191
192 fn write_archive_with_symlink(path: &Path, link_name: &str, target: &str) {
193 let file = std::fs::File::create(path).unwrap();
194 let gz = GzEncoder::new(file, Compression::default());
195 let mut builder = Builder::new(gz);
196 let mut header = tar::Header::new_gnu();
197 header.set_entry_type(tar::EntryType::Symlink);
198 header.set_size(0);
199 header.set_mode(0o644);
200 header.set_cksum();
201 builder.append_link(&mut header, link_name, target).unwrap();
202 builder.into_inner().unwrap().finish().unwrap();
203 }
204
205 fn make_file_info() -> HashMap<String, PatchFileInfo> {
206 let mut files = HashMap::new();
207 files.insert(
208 "package/index.js".to_string(),
209 PatchFileInfo {
210 before_hash: "a".repeat(64),
211 after_hash: "b".repeat(64),
212 },
213 );
214 files.insert(
215 "lib/util.js".to_string(),
216 PatchFileInfo {
217 before_hash: "c".repeat(64),
218 after_hash: "d".repeat(64),
219 },
220 );
221 files
222 }
223
224 #[test]
225 fn test_read_archive_basic() {
226 let dir = tempfile::tempdir().unwrap();
227 let archive = dir.path().join("arc.tar.gz");
228 write_archive(
229 &archive,
230 &[
231 ("package/index.js", b"patched index"),
232 ("lib/util.js", b"patched util"),
233 ],
234 );
235
236 let map = read_archive_to_map(&archive).unwrap();
237 assert_eq!(map.len(), 2);
238 assert_eq!(map.get("index.js").unwrap(), b"patched index");
240 assert_eq!(map.get("lib/util.js").unwrap(), b"patched util");
241 }
242
243 fn write_raw_archive(path: &Path, name: &[u8], data: &[u8]) {
248 let mut block = [0u8; 512];
249 let copy_len = name.len().min(100);
251 block[..copy_len].copy_from_slice(&name[..copy_len]);
252 block[100..108].copy_from_slice(b"0000644\0");
254 let size_str = format!("{:011o}", data.len());
256 block[124..135].copy_from_slice(size_str.as_bytes());
257 block[135] = 0;
258 block[136..147].copy_from_slice(b"00000000000");
260 block[147] = 0;
261 block[156] = b'0';
263 block[257..263].copy_from_slice(b"ustar\0");
265 block[263..265].copy_from_slice(b"00");
266 block[148..156].fill(b' ');
268 let sum: u32 = block.iter().map(|&b| b as u32).sum();
269 let sum_str = format!("{:06o}\0 ", sum);
270 block[148..156].copy_from_slice(sum_str.as_bytes());
271
272 let mut tar_bytes = Vec::new();
273 tar_bytes.extend_from_slice(&block);
274 tar_bytes.extend_from_slice(data);
275 let pad = (512 - (data.len() % 512)) % 512;
277 tar_bytes.extend(std::iter::repeat_n(0u8, pad));
278 tar_bytes.extend([0u8; 1024]);
280
281 let file = std::fs::File::create(path).unwrap();
282 let mut gz = GzEncoder::new(file, Compression::default());
283 gz.write_all(&tar_bytes).unwrap();
284 gz.finish().unwrap();
285 }
286
287 #[test]
288 fn test_read_archive_rejects_absolute_paths() {
289 let dir = tempfile::tempdir().unwrap();
290 let archive = dir.path().join("arc.tar.gz");
291 write_raw_archive(&archive, b"/etc/passwd", b"evil");
292
293 let err = read_archive_to_map(&archive).unwrap_err();
294 assert!(matches!(err, ArchiveError::UnsafePath(_)));
295 }
296
297 #[test]
298 fn test_read_archive_rejects_backslash_absolute_paths() {
299 let dir = tempfile::tempdir().unwrap();
302 let archive = dir.path().join("arc.tar.gz");
303 write_raw_archive(&archive, b"\\Windows\\System32\\evil.dll", b"evil");
304
305 let err = read_archive_to_map(&archive).unwrap_err();
306 assert!(matches!(err, ArchiveError::UnsafePath(_)));
307 }
308
309 #[test]
310 fn test_read_archive_rejects_double_slash_package_escape() {
311 let dir = tempfile::tempdir().unwrap();
318 let archive = dir.path().join("arc.tar.gz");
319 write_raw_archive(&archive, b"package//etc/passwd", b"evil");
320
321 let err = read_archive_to_map(&archive).unwrap_err();
322 assert!(
323 matches!(err, ArchiveError::UnsafePath(_)),
324 "double-slash package escape must be rejected, got {err:?}"
325 );
326 }
327
328 #[test]
329 fn test_read_archive_rejects_package_prefixed_backslash_escape() {
330 let dir = tempfile::tempdir().unwrap();
334 let archive = dir.path().join("arc.tar.gz");
335 write_raw_archive(&archive, b"package/\\evil", b"evil");
336
337 let err = read_archive_to_map(&archive).unwrap_err();
338 assert!(
339 matches!(err, ArchiveError::UnsafePath(_)),
340 "package-prefixed backslash escape must be rejected, got {err:?}"
341 );
342 }
343
344 #[test]
345 fn test_read_archive_rejects_package_prefixed_parent_traversal() {
346 let dir = tempfile::tempdir().unwrap();
349 let archive = dir.path().join("arc.tar.gz");
350 write_raw_archive(&archive, b"package/../../etc/passwd", b"evil");
351
352 let err = read_archive_to_map(&archive).unwrap_err();
353 assert!(
354 matches!(err, ArchiveError::UnsafePath(_)),
355 "package-prefixed parent traversal must be rejected, got {err:?}"
356 );
357 }
358
359 #[test]
360 fn test_read_archive_rejects_parent_traversal() {
361 let dir = tempfile::tempdir().unwrap();
362 let archive = dir.path().join("arc.tar.gz");
363 write_raw_archive(&archive, b"../../etc/passwd", b"evil");
364
365 let err = read_archive_to_map(&archive).unwrap_err();
366 assert!(matches!(err, ArchiveError::UnsafePath(_)));
367 }
368
369 #[test]
370 fn test_read_archive_skips_non_regular_entries() {
371 let dir = tempfile::tempdir().unwrap();
372 let archive = dir.path().join("arc.tar.gz");
373 write_archive_with_symlink(&archive, "link", "target");
374 let map = read_archive_to_map(&archive).unwrap();
376 assert!(map.is_empty());
377 }
378
379 #[test]
380 fn test_read_archive_filtered_drops_unexpected_entries() {
381 let dir = tempfile::tempdir().unwrap();
382 let archive = dir.path().join("arc.tar.gz");
383 write_archive(
384 &archive,
385 &[
386 ("package/index.js", b"patched index"),
387 ("lib/util.js", b"patched util"),
388 ("bonus/extra.js", b"unwanted"),
389 ],
390 );
391
392 let files = make_file_info();
393 let map = read_archive_filtered(&archive, &files).unwrap();
394 assert_eq!(map.len(), 2);
396 assert!(map.contains_key("index.js"));
397 assert!(map.contains_key("lib/util.js"));
398 assert!(!map.contains_key("bonus/extra.js"));
399 }
400
401 #[test]
402 fn test_read_archive_missing_file() {
403 let result = read_archive_to_map(Path::new("/nonexistent/archive.tar.gz"));
404 assert!(result.is_err());
405 }
406
407 #[test]
408 fn test_normalize_entry_path() {
409 assert_eq!(normalize_entry_path("package/lib/x.js"), "lib/x.js");
410 assert_eq!(normalize_entry_path("lib/x.js"), "lib/x.js");
411 assert_eq!(normalize_entry_path("packagefoo/x.js"), "packagefoo/x.js");
412 }
413
414 #[test]
415 fn test_read_archive_corrupt_gzip() {
416 let dir = tempfile::tempdir().unwrap();
417 let archive = dir.path().join("bogus.tar.gz");
418 std::fs::write(&archive, b"not actually gzipped").unwrap();
419 let result = read_archive_to_map(&archive);
420 assert!(result.is_err());
421 }
422
423 #[test]
424 #[allow(clippy::needless_borrows_for_generic_args)]
425 fn test_round_trip_via_builder() {
426 let dir = tempfile::tempdir().unwrap();
428 let archive = dir.path().join("rt.tar.gz");
429 let original: &[u8] = b"hello world";
430 write_archive(&archive, &[("only.txt", original)]);
431 let map = read_archive_to_map(&archive).unwrap();
432 assert_eq!(map.get("only.txt").map(|v| v.as_slice()), Some(original));
433 }
434
435 fn raw_entry(name: &[u8], declared_size: u64, data: &[u8]) -> Vec<u8> {
442 let mut block = [0u8; 512];
443 let copy_len = name.len().min(100);
444 block[..copy_len].copy_from_slice(&name[..copy_len]);
445 block[100..108].copy_from_slice(b"0000644\0");
446 let size_str = format!("{:011o}", declared_size);
447 block[124..135].copy_from_slice(size_str.as_bytes());
448 block[135] = 0;
449 block[136..147].copy_from_slice(b"00000000000");
450 block[147] = 0;
451 block[156] = b'0'; block[257..263].copy_from_slice(b"ustar\0");
453 block[263..265].copy_from_slice(b"00");
454 block[148..156].fill(b' ');
455 let sum: u32 = block.iter().map(|&b| b as u32).sum();
456 let sum_str = format!("{:06o}\0 ", sum);
457 block[148..156].copy_from_slice(sum_str.as_bytes());
458
459 let mut out = Vec::new();
460 out.extend_from_slice(&block);
461 out.extend_from_slice(data);
462 let pad = if data.is_empty() {
463 0
464 } else {
465 (512 - (data.len() % 512)) % 512
466 };
467 out.extend(std::iter::repeat_n(0u8, pad));
468 out
469 }
470
471 fn write_raw_tar_gz(path: &Path, entries: &[Vec<u8>], trailer: bool) {
472 let mut tar_bytes = Vec::new();
473 for e in entries {
474 tar_bytes.extend_from_slice(e);
475 }
476 if trailer {
477 tar_bytes.extend([0u8; 1024]);
478 }
479 let file = std::fs::File::create(path).unwrap();
480 let mut gz = GzEncoder::new(file, Compression::default());
481 gz.write_all(&tar_bytes).unwrap();
482 gz.finish().unwrap();
483 }
484
485 #[test]
486 fn test_read_archive_rejects_oversize_entry_header() {
487 let dir = tempfile::tempdir().unwrap();
492 let archive = dir.path().join("oversize.tar.gz");
493 let entry = raw_entry(b"big.bin", 1024 * 1024 * 1024, b"tiny");
494 write_raw_tar_gz(&archive, &[entry], true);
495
496 let err = read_archive_to_map(&archive).unwrap_err();
497 assert!(
498 matches!(err, ArchiveError::EntryTooLarge { .. }),
499 "expected EntryTooLarge, got {:?}",
500 err
501 );
502 }
503
504 #[test]
505 fn test_read_archive_rejects_too_many_entries() {
506 let dir = tempfile::tempdir().unwrap();
509 let archive = dir.path().join("many.tar.gz");
510 let entries: Vec<Vec<u8>> = (0..(MAX_ENTRIES + 1))
511 .map(|i| raw_entry(format!("f{i}").as_bytes(), 0, b""))
512 .collect();
513 write_raw_tar_gz(&archive, &entries, true);
514
515 let err = read_archive_to_map(&archive).unwrap_err();
516 assert!(
517 matches!(err, ArchiveError::TooManyEntries(_)),
518 "expected TooManyEntries, got {:?}",
519 err
520 );
521 }
522
523 #[test]
524 fn test_read_archive_decompression_bomb_truncated() {
525 let dir = tempfile::tempdir().unwrap();
537 let archive = dir.path().join("bomb.tar.gz");
538
539 let chunk = vec![0u8; (MAX_ENTRY_BYTES - 1) as usize];
544 let entry1 = raw_entry(b"a.bin", chunk.len() as u64, &chunk);
545 let entry2 = raw_entry(b"b.bin", chunk.len() as u64, &chunk);
546 let entry3 = raw_entry(b"c.bin", chunk.len() as u64, &chunk);
547 let entry4 = raw_entry(b"d.bin", chunk.len() as u64, &chunk);
548 let entry5 = raw_entry(b"e.bin", chunk.len() as u64, &chunk);
551 write_raw_tar_gz(&archive, &[entry1, entry2, entry3, entry4, entry5], true);
552
553 let result = read_archive_to_map(&archive);
554 match result {
559 Err(_) => { }
560 Ok(map) => {
561 assert!(
563 map.len() < 5,
564 "decompression cap failed: ingested {} entries (~{} MiB)",
565 map.len(),
566 map.len() * (MAX_ENTRY_BYTES as usize - 1) / (1024 * 1024)
567 );
568 }
569 }
570 }
571}