1use std::collections::HashMap;
14use std::io::Read;
15use std::path::Path;
16
17use flate2::read::GzDecoder;
18use tar::Archive;
19
20use crate::manifest::schema::PatchFileInfo;
21
22const MAX_TOTAL_DECOMPRESSED_BYTES: u64 = 64 * 1024 * 1024;
26
27const MAX_ENTRY_BYTES: u64 = 16 * 1024 * 1024;
31
32const MAX_ENTRIES: usize = 10_000;
36
37#[derive(Debug, thiserror::Error)]
39pub enum ArchiveError {
40 #[error("archive I/O error: {0}")]
41 Io(#[from] std::io::Error),
42 #[error("entry path {0:?} escapes the archive root")]
43 UnsafePath(String),
44 #[error("entry {path:?} is {size} bytes (max {max})")]
45 EntryTooLarge { path: String, size: u64, max: u64 },
46 #[error("archive contains more than {0} entries")]
47 TooManyEntries(usize),
48}
49
50fn normalize_entry_path(path: &str) -> &str {
53 path.strip_prefix("package/").unwrap_or(path)
54}
55
56pub fn read_archive_to_map(archive_path: &Path) -> Result<HashMap<String, Vec<u8>>, ArchiveError> {
71 let file = std::fs::File::open(archive_path)?;
72 let bounded = GzDecoder::new(file).take(MAX_TOTAL_DECOMPRESSED_BYTES);
76 let mut tar = Archive::new(bounded);
77
78 let mut out: HashMap<String, Vec<u8>> = HashMap::new();
79 let mut entry_count: usize = 0;
80 for entry in tar.entries()? {
81 let mut entry = entry?;
82
83 entry_count += 1;
84 if entry_count > MAX_ENTRIES {
85 return Err(ArchiveError::TooManyEntries(MAX_ENTRIES));
86 }
87
88 if entry.header().entry_type() != tar::EntryType::Regular {
90 continue;
91 }
92
93 let path = entry.path()?;
94 let path_str = path.to_string_lossy().to_string();
95
96 let leading_separator = path_str
104 .as_bytes()
105 .first()
106 .is_some_and(|b| *b == b'/' || *b == b'\\');
107 if path.is_absolute()
108 || leading_separator
109 || path
110 .components()
111 .any(|c| matches!(c, std::path::Component::ParentDir))
112 {
113 return Err(ArchiveError::UnsafePath(path_str));
114 }
115
116 let size = entry.size();
120 if size > MAX_ENTRY_BYTES {
121 return Err(ArchiveError::EntryTooLarge {
122 path: path_str,
123 size,
124 max: MAX_ENTRY_BYTES,
125 });
126 }
127
128 let normalized = normalize_entry_path(&path_str).to_string();
129 let mut bytes = Vec::with_capacity(size as usize);
132 entry.read_to_end(&mut bytes)?;
133 out.insert(normalized, bytes);
134 }
135
136 Ok(out)
137}
138
139pub fn read_archive_filtered(
144 archive_path: &Path,
145 expected_files: &HashMap<String, PatchFileInfo>,
146) -> Result<HashMap<String, Vec<u8>>, ArchiveError> {
147 let allowed: std::collections::HashSet<String> = expected_files
148 .keys()
149 .map(|k| normalize_entry_path(k).to_string())
150 .collect();
151
152 let all = read_archive_to_map(archive_path)?;
153 Ok(all
154 .into_iter()
155 .filter(|(k, _)| allowed.contains(k))
156 .collect())
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use flate2::write::GzEncoder;
163 use flate2::Compression;
164 use std::io::Write;
165 use tar::Builder;
166
167 fn write_archive(path: &Path, entries: &[(&str, &[u8])]) {
168 let file = std::fs::File::create(path).unwrap();
169 let gz = GzEncoder::new(file, Compression::default());
170 let mut builder = Builder::new(gz);
171 for (name, data) in entries {
172 let mut header = tar::Header::new_gnu();
173 header.set_size(data.len() as u64);
174 header.set_mode(0o644);
175 header.set_cksum();
176 builder.append_data(&mut header, name, *data).unwrap();
177 }
178 builder.into_inner().unwrap().finish().unwrap();
179 }
180
181 fn write_archive_with_symlink(path: &Path, link_name: &str, target: &str) {
182 let file = std::fs::File::create(path).unwrap();
183 let gz = GzEncoder::new(file, Compression::default());
184 let mut builder = Builder::new(gz);
185 let mut header = tar::Header::new_gnu();
186 header.set_entry_type(tar::EntryType::Symlink);
187 header.set_size(0);
188 header.set_mode(0o644);
189 header.set_cksum();
190 builder
191 .append_link(&mut header, link_name, target)
192 .unwrap();
193 builder.into_inner().unwrap().finish().unwrap();
194 }
195
196 fn make_file_info() -> HashMap<String, PatchFileInfo> {
197 let mut files = HashMap::new();
198 files.insert(
199 "package/index.js".to_string(),
200 PatchFileInfo {
201 before_hash: "a".repeat(64),
202 after_hash: "b".repeat(64),
203 },
204 );
205 files.insert(
206 "lib/util.js".to_string(),
207 PatchFileInfo {
208 before_hash: "c".repeat(64),
209 after_hash: "d".repeat(64),
210 },
211 );
212 files
213 }
214
215 #[test]
216 fn test_read_archive_basic() {
217 let dir = tempfile::tempdir().unwrap();
218 let archive = dir.path().join("arc.tar.gz");
219 write_archive(
220 &archive,
221 &[
222 ("package/index.js", b"patched index"),
223 ("lib/util.js", b"patched util"),
224 ],
225 );
226
227 let map = read_archive_to_map(&archive).unwrap();
228 assert_eq!(map.len(), 2);
229 assert_eq!(map.get("index.js").unwrap(), b"patched index");
231 assert_eq!(map.get("lib/util.js").unwrap(), b"patched util");
232 }
233
234 fn write_raw_archive(path: &Path, name: &[u8], data: &[u8]) {
239 let mut block = [0u8; 512];
240 let copy_len = name.len().min(100);
242 block[..copy_len].copy_from_slice(&name[..copy_len]);
243 block[100..108].copy_from_slice(b"0000644\0");
245 let size_str = format!("{:011o}", data.len());
247 block[124..135].copy_from_slice(size_str.as_bytes());
248 block[135] = 0;
249 block[136..147].copy_from_slice(b"00000000000");
251 block[147] = 0;
252 block[156] = b'0';
254 block[257..263].copy_from_slice(b"ustar\0");
256 block[263..265].copy_from_slice(b"00");
257 block[148..156].fill(b' ');
259 let sum: u32 = block.iter().map(|&b| b as u32).sum();
260 let sum_str = format!("{:06o}\0 ", sum);
261 block[148..156].copy_from_slice(sum_str.as_bytes());
262
263 let mut tar_bytes = Vec::new();
264 tar_bytes.extend_from_slice(&block);
265 tar_bytes.extend_from_slice(data);
266 let pad = (512 - (data.len() % 512)) % 512;
268 tar_bytes.extend(std::iter::repeat_n(0u8, pad));
269 tar_bytes.extend([0u8; 1024]);
271
272 let file = std::fs::File::create(path).unwrap();
273 let mut gz = GzEncoder::new(file, Compression::default());
274 gz.write_all(&tar_bytes).unwrap();
275 gz.finish().unwrap();
276 }
277
278 #[test]
279 fn test_read_archive_rejects_absolute_paths() {
280 let dir = tempfile::tempdir().unwrap();
281 let archive = dir.path().join("arc.tar.gz");
282 write_raw_archive(&archive, b"/etc/passwd", b"evil");
283
284 let err = read_archive_to_map(&archive).unwrap_err();
285 assert!(matches!(err, ArchiveError::UnsafePath(_)));
286 }
287
288 #[test]
289 fn test_read_archive_rejects_backslash_absolute_paths() {
290 let dir = tempfile::tempdir().unwrap();
293 let archive = dir.path().join("arc.tar.gz");
294 write_raw_archive(&archive, b"\\Windows\\System32\\evil.dll", b"evil");
295
296 let err = read_archive_to_map(&archive).unwrap_err();
297 assert!(matches!(err, ArchiveError::UnsafePath(_)));
298 }
299
300 #[test]
301 fn test_read_archive_rejects_parent_traversal() {
302 let dir = tempfile::tempdir().unwrap();
303 let archive = dir.path().join("arc.tar.gz");
304 write_raw_archive(&archive, b"../../etc/passwd", b"evil");
305
306 let err = read_archive_to_map(&archive).unwrap_err();
307 assert!(matches!(err, ArchiveError::UnsafePath(_)));
308 }
309
310 #[test]
311 fn test_read_archive_skips_non_regular_entries() {
312 let dir = tempfile::tempdir().unwrap();
313 let archive = dir.path().join("arc.tar.gz");
314 write_archive_with_symlink(&archive, "link", "target");
315 let map = read_archive_to_map(&archive).unwrap();
317 assert!(map.is_empty());
318 }
319
320 #[test]
321 fn test_read_archive_filtered_drops_unexpected_entries() {
322 let dir = tempfile::tempdir().unwrap();
323 let archive = dir.path().join("arc.tar.gz");
324 write_archive(
325 &archive,
326 &[
327 ("package/index.js", b"patched index"),
328 ("lib/util.js", b"patched util"),
329 ("bonus/extra.js", b"unwanted"),
330 ],
331 );
332
333 let files = make_file_info();
334 let map = read_archive_filtered(&archive, &files).unwrap();
335 assert_eq!(map.len(), 2);
337 assert!(map.contains_key("index.js"));
338 assert!(map.contains_key("lib/util.js"));
339 assert!(!map.contains_key("bonus/extra.js"));
340 }
341
342 #[test]
343 fn test_read_archive_missing_file() {
344 let result = read_archive_to_map(Path::new("/nonexistent/archive.tar.gz"));
345 assert!(result.is_err());
346 }
347
348 #[test]
349 fn test_normalize_entry_path() {
350 assert_eq!(normalize_entry_path("package/lib/x.js"), "lib/x.js");
351 assert_eq!(normalize_entry_path("lib/x.js"), "lib/x.js");
352 assert_eq!(normalize_entry_path("packagefoo/x.js"), "packagefoo/x.js");
353 }
354
355 #[test]
356 fn test_read_archive_corrupt_gzip() {
357 let dir = tempfile::tempdir().unwrap();
358 let archive = dir.path().join("bogus.tar.gz");
359 std::fs::write(&archive, b"not actually gzipped").unwrap();
360 let result = read_archive_to_map(&archive);
361 assert!(result.is_err());
362 }
363
364 #[test]
365 #[allow(clippy::needless_borrows_for_generic_args)]
366 fn test_round_trip_via_builder() {
367 let dir = tempfile::tempdir().unwrap();
369 let archive = dir.path().join("rt.tar.gz");
370 let original: &[u8] = b"hello world";
371 write_archive(&archive, &[("only.txt", original)]);
372 let map = read_archive_to_map(&archive).unwrap();
373 assert_eq!(map.get("only.txt").map(|v| v.as_slice()), Some(original));
374 }
375
376 fn raw_entry(name: &[u8], declared_size: u64, data: &[u8]) -> Vec<u8> {
383 let mut block = [0u8; 512];
384 let copy_len = name.len().min(100);
385 block[..copy_len].copy_from_slice(&name[..copy_len]);
386 block[100..108].copy_from_slice(b"0000644\0");
387 let size_str = format!("{:011o}", declared_size);
388 block[124..135].copy_from_slice(size_str.as_bytes());
389 block[135] = 0;
390 block[136..147].copy_from_slice(b"00000000000");
391 block[147] = 0;
392 block[156] = b'0'; block[257..263].copy_from_slice(b"ustar\0");
394 block[263..265].copy_from_slice(b"00");
395 block[148..156].fill(b' ');
396 let sum: u32 = block.iter().map(|&b| b as u32).sum();
397 let sum_str = format!("{:06o}\0 ", sum);
398 block[148..156].copy_from_slice(sum_str.as_bytes());
399
400 let mut out = Vec::new();
401 out.extend_from_slice(&block);
402 out.extend_from_slice(data);
403 let pad = if data.is_empty() {
404 0
405 } else {
406 (512 - (data.len() % 512)) % 512
407 };
408 out.extend(std::iter::repeat_n(0u8, pad));
409 out
410 }
411
412 fn write_raw_tar_gz(path: &Path, entries: &[Vec<u8>], trailer: bool) {
413 let mut tar_bytes = Vec::new();
414 for e in entries {
415 tar_bytes.extend_from_slice(e);
416 }
417 if trailer {
418 tar_bytes.extend([0u8; 1024]);
419 }
420 let file = std::fs::File::create(path).unwrap();
421 let mut gz = GzEncoder::new(file, Compression::default());
422 gz.write_all(&tar_bytes).unwrap();
423 gz.finish().unwrap();
424 }
425
426 #[test]
427 fn test_read_archive_rejects_oversize_entry_header() {
428 let dir = tempfile::tempdir().unwrap();
433 let archive = dir.path().join("oversize.tar.gz");
434 let entry = raw_entry(b"big.bin", 1024 * 1024 * 1024, b"tiny");
435 write_raw_tar_gz(&archive, &[entry], true);
436
437 let err = read_archive_to_map(&archive).unwrap_err();
438 assert!(
439 matches!(err, ArchiveError::EntryTooLarge { .. }),
440 "expected EntryTooLarge, got {:?}",
441 err
442 );
443 }
444
445 #[test]
446 fn test_read_archive_rejects_too_many_entries() {
447 let dir = tempfile::tempdir().unwrap();
450 let archive = dir.path().join("many.tar.gz");
451 let entries: Vec<Vec<u8>> = (0..(MAX_ENTRIES + 1))
452 .map(|i| raw_entry(format!("f{i}").as_bytes(), 0, b""))
453 .collect();
454 write_raw_tar_gz(&archive, &entries, true);
455
456 let err = read_archive_to_map(&archive).unwrap_err();
457 assert!(
458 matches!(err, ArchiveError::TooManyEntries(_)),
459 "expected TooManyEntries, got {:?}",
460 err
461 );
462 }
463
464 #[test]
465 fn test_read_archive_decompression_bomb_truncated() {
466 let dir = tempfile::tempdir().unwrap();
478 let archive = dir.path().join("bomb.tar.gz");
479
480 let chunk = vec![0u8; (MAX_ENTRY_BYTES - 1) as usize];
485 let entry1 = raw_entry(b"a.bin", chunk.len() as u64, &chunk);
486 let entry2 = raw_entry(b"b.bin", chunk.len() as u64, &chunk);
487 let entry3 = raw_entry(b"c.bin", chunk.len() as u64, &chunk);
488 let entry4 = raw_entry(b"d.bin", chunk.len() as u64, &chunk);
489 let entry5 = raw_entry(b"e.bin", chunk.len() as u64, &chunk);
492 write_raw_tar_gz(&archive, &[entry1, entry2, entry3, entry4, entry5], true);
493
494 let result = read_archive_to_map(&archive);
495 match result {
500 Err(_) => { }
501 Ok(map) => {
502 assert!(
504 map.len() < 5,
505 "decompression cap failed: ingested {} entries (~{} MiB)",
506 map.len(),
507 map.len() * (MAX_ENTRY_BYTES as usize - 1) / (1024 * 1024)
508 );
509 }
510 }
511 }
512}