1use crate::header::PosixHeader;
28use crate::tar_format_types::TarFormatString;
29use crate::{BLOCKSIZE, POSIX_1003_MAX_FILENAME_LEN};
30#[cfg(feature = "alloc")]
31use alloc::boxed::Box;
32use core::fmt::{Debug, Display, Formatter};
33use core::str::Utf8Error;
34use log::{error, warn};
35
36pub const MIN_BLOCK_COUNT: usize = 3;
40
41pub struct ArchiveEntry<'a> {
44 filename: TarFormatString<POSIX_1003_MAX_FILENAME_LEN>,
45 data: &'a [u8],
46 size: usize,
47 posix_header: &'a PosixHeader,
48}
49
50#[allow(unused)]
51impl<'a> ArchiveEntry<'a> {
52 const fn new(
53 filename: TarFormatString<POSIX_1003_MAX_FILENAME_LEN>,
54 data: &'a [u8],
55 posix_header: &'a PosixHeader,
56 ) -> Self {
57 ArchiveEntry {
58 filename,
59 data,
60 size: data.len(),
61 posix_header,
62 }
63 }
64
65 #[must_use]
68 pub const fn filename(&self) -> TarFormatString<{ POSIX_1003_MAX_FILENAME_LEN }> {
69 self.filename
70 }
71
72 #[must_use]
74 pub const fn data(&self) -> &'a [u8] {
75 self.data
76 }
77
78 #[allow(clippy::missing_const_for_fn)]
83 pub fn data_as_str(&self) -> Result<&'a str, Utf8Error> {
84 core::str::from_utf8(self.data)
85 }
86
87 #[must_use]
89 pub const fn size(&self) -> usize {
90 self.size
91 }
92
93 #[must_use]
95 pub const fn posix_header(&self) -> &PosixHeader {
96 self.posix_header
97 }
98}
99
100impl Debug for ArchiveEntry<'_> {
101 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
102 f.debug_struct("ArchiveEntry")
103 .field("filename", &self.filename().as_str())
104 .field("size", &self.size())
105 .field("data", &"<bytes>")
106 .finish()
107 }
108}
109
110#[derive(Copy, Clone, Debug, PartialEq, Eq)]
116pub struct CorruptDataError;
117
118impl Display for CorruptDataError {
119 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
120 Debug::fmt(self, f)
121 }
122}
123
124impl core::error::Error for CorruptDataError {}
125
126#[cfg(feature = "alloc")]
131#[derive(Clone, Debug, PartialEq, Eq)]
132pub struct TarArchive {
133 data: Box<[u8]>,
134}
135
136#[cfg(feature = "alloc")]
137impl TarArchive {
138 pub fn new(data: Box<[u8]>) -> Result<Self, CorruptDataError> {
146 TarArchiveRef::validate(&data).map(|_| Self { data })
147 }
148
149 #[must_use]
153 pub fn entries(&self) -> ArchiveEntryIterator<'_> {
154 ArchiveEntryIterator::new(self.data.as_ref())
155 }
156}
157
158#[cfg(feature = "alloc")]
159impl From<Box<[u8]>> for TarArchive {
160 fn from(data: Box<[u8]>) -> Self {
161 Self::new(data).unwrap()
162 }
163}
164
165#[cfg(feature = "alloc")]
166impl From<TarArchive> for Box<[u8]> {
167 fn from(ar: TarArchive) -> Self {
168 ar.data
169 }
170}
171
172#[derive(Clone, Debug, PartialEq, Eq)]
175pub struct TarArchiveRef<'a> {
176 data: &'a [u8],
177}
178
179#[allow(unused)]
180impl<'a> TarArchiveRef<'a> {
181 pub fn new(data: &'a [u8]) -> Result<Self, CorruptDataError> {
187 Self::validate(data).map(|()| Self { data })
188 }
189
190 fn validate(data: &'a [u8]) -> Result<(), CorruptDataError> {
191 let is_malformed = (data.len() % BLOCKSIZE) != 0;
192 let has_min_block_count = data.len() / BLOCKSIZE >= MIN_BLOCK_COUNT;
193 (!data.is_empty() && !is_malformed && has_min_block_count)
194 .then_some(())
195 .ok_or(CorruptDataError)
196 }
197
198 #[must_use]
200 pub fn entries(&self) -> ArchiveEntryIterator<'a> {
201 ArchiveEntryIterator::new(self.data)
202 }
203}
204
205#[derive(Debug)]
207pub struct ArchiveHeaderIterator<'a> {
208 archive_data: &'a [u8],
209 next_hdr_block_index: usize,
210}
211
212impl<'a> ArchiveHeaderIterator<'a> {
213 #[must_use]
218 pub fn new(archive: &'a [u8]) -> Self {
219 assert!(!archive.is_empty());
220 assert_eq!(archive.len() % BLOCKSIZE, 0);
221 Self {
222 archive_data: archive,
223 next_hdr_block_index: 0,
224 }
225 }
226
227 const fn block_as_header(&self, block_index: usize) -> &'a PosixHeader {
229 unsafe {
230 self.archive_data
231 .as_ptr()
232 .add(block_index * BLOCKSIZE)
233 .cast::<PosixHeader>()
234 .as_ref()
235 .unwrap()
236 }
237 }
238}
239
240type BlockIndex = usize;
241
242impl<'a> Iterator for ArchiveHeaderIterator<'a> {
243 type Item = (BlockIndex, &'a PosixHeader);
244
245 fn next(&mut self) -> Option<Self::Item> {
251 let total_block_count = self.archive_data.len() / BLOCKSIZE;
252 if self.next_hdr_block_index >= total_block_count {
253 warn!(
254 "Invalid block index. Probably the Tar is corrupt: an header had an invalid payload size"
255 );
256 return None;
257 }
258
259 let hdr = self.block_as_header(self.next_hdr_block_index);
260 let block_index = self.next_hdr_block_index;
261
262 self.next_hdr_block_index += 1;
264
265 if let Ok(typeflag) = hdr.typeflag.try_to_type_flag() {
269 if typeflag.is_regular_file() {
270 let payload_block_count = hdr
271 .payload_block_count()
272 .inspect_err(|e| {
273 log::error!("Unparsable size ({e:?}) in header {hdr:#?}");
274 })
275 .ok()?;
276 self.next_hdr_block_index += payload_block_count;
277 }
278 }
279
280 Some((block_index, hdr))
281 }
282}
283
284impl ExactSizeIterator for ArchiveEntryIterator<'_> {}
285
286#[derive(Debug)]
292pub struct ArchiveEntryIterator<'a>(ArchiveHeaderIterator<'a>);
293
294impl<'a> ArchiveEntryIterator<'a> {
295 fn new(archive: &'a [u8]) -> Self {
296 Self(ArchiveHeaderIterator::new(archive))
297 }
298
299 fn next_hdr(&mut self) -> Option<(BlockIndex, &'a PosixHeader)> {
300 self.0.next()
301 }
302}
303
304impl<'a> Iterator for ArchiveEntryIterator<'a> {
305 type Item = ArchiveEntry<'a>;
306
307 fn next(&mut self) -> Option<Self::Item> {
308 let (mut block_index, mut hdr) = self.next_hdr()?;
309
310 while !hdr
313 .typeflag
314 .try_to_type_flag()
315 .inspect_err(|e| error!("Invalid TypeFlag: {e:?}"))
316 .ok()?
317 .is_regular_file()
318 {
319 warn!(
320 "Skipping entry of type {:?} (not supported yet)",
321 hdr.typeflag
322 );
323
324 (block_index, hdr) = self.next_hdr()?;
326 }
327
328 if hdr.is_zero_block() {
330 if self.next_hdr()?.1.is_zero_block() {
331 return None;
333 }
334
335 panic!("should never have a missing double zero block: is the Tar archive corrupt?");
336 }
337
338 let payload_size: usize = hdr
339 .size
340 .as_number()
341 .inspect_err(|e| error!("Can't parse the file size from the header. {e:#?}"))
342 .ok()?;
343
344 let idx_first_data_block = block_index + 1;
345 let idx_begin = idx_first_data_block * BLOCKSIZE;
346 let idx_end_exclusive = idx_begin + payload_size;
347
348 let max_data_end_index_exclusive = self.0.archive_data.len() - 2 * BLOCKSIZE;
351 if idx_end_exclusive > max_data_end_index_exclusive {
352 warn!(
353 "Invalid Tar. The size of the payload ({payload_size}) is larger than what is valid"
354 );
355 return None;
356 }
357
358 let file_bytes = &self.0.archive_data[idx_begin..idx_end_exclusive];
359
360 let mut filename =
361 TarFormatString::<POSIX_1003_MAX_FILENAME_LEN>::new([0; POSIX_1003_MAX_FILENAME_LEN]);
362
363 if (
366 hdr.magic.as_str(),
367 hdr.version.as_str(),
368 hdr.prefix.is_empty(),
369 ) == (Ok("ustar"), Ok("00"), false)
370 {
371 filename.append(&hdr.prefix);
372 filename.append(&TarFormatString::<1>::new([b'/']));
373 }
374 filename.append(&hdr.name);
375 Some(ArchiveEntry::new(filename, file_bytes, hdr))
376 }
377}
378
379#[cfg(test)]
380mod tests {
381 use super::*;
382 use crate::TarFormatOctal;
383 use std::vec::Vec;
384
385 #[test]
386 #[rustfmt::skip]
387 fn test_constructor_returns_error() {
388 assert_eq!(TarArchiveRef::new(&[0]), Err(CorruptDataError));
389 assert_eq!(TarArchiveRef::new(&[]), Err(CorruptDataError));
390 assert!(TarArchiveRef::new(&[0; BLOCKSIZE * MIN_BLOCK_COUNT]).is_ok());
391
392 #[cfg(feature = "alloc")]
393 {
394 assert_eq!(TarArchive::new(vec![].into_boxed_slice()), Err(CorruptDataError));
395 assert_eq!(TarArchive::new(vec![0].into_boxed_slice()), Err(CorruptDataError));
396 assert!(TarArchive::new(vec![0; BLOCKSIZE * MIN_BLOCK_COUNT].into_boxed_slice()).is_ok());
397 };
398 }
399
400 #[test]
401 fn test_header_iterator() {
402 let archive = include_bytes!("../tests/gnu_tar_default.tar");
403 let iter = ArchiveHeaderIterator::new(archive);
404 let names = iter
405 .map(|(_i, hdr)| hdr.name.as_str().unwrap())
406 .collect::<Vec<_>>();
407
408 assert_eq!(
409 names.as_slice(),
410 &[
411 "bye_world_513b.txt",
412 "hello_world_513b.txt",
413 "hello_world.txt",
414 ]
415 );
416 }
417
418 #[test]
420 fn test_print_archive_headers() {
421 let data = include_bytes!("../tests/gnu_tar_default.tar");
422
423 let iter = ArchiveHeaderIterator::new(data);
424 let entries = iter.map(|(_, hdr)| hdr).collect::<Vec<_>>();
425 println!("{entries:#?}");
426 }
427
428 #[test]
430 fn test_print_archive_list() {
431 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap();
432 let entries = archive.entries().collect::<Vec<_>>();
433 println!("{entries:#?}");
434 }
435
436 #[test]
444 fn test_weird_fuzzing_tarballs() {
445 let main_tarball =
450 TarArchiveRef::new(include_bytes!("../tests/weird_fuzzing_tarballs.tar")).unwrap();
451
452 let mut all_entries = vec![];
453 for tarball in main_tarball.entries() {
454 let tarball = TarArchiveRef::new(tarball.data()).unwrap();
455 for entry in tarball.entries() {
456 all_entries.push(entry.filename());
457 }
458 }
459
460 for entry in all_entries {
462 eprintln!("\"{entry:?}\",");
463 }
464 }
465
466 #[test]
468 fn test_archive_entries() {
469 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap();
470 let entries = archive.entries().collect::<Vec<_>>();
471 assert_archive_content(&entries);
472
473 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_gnu.tar")).unwrap();
474 let entries = archive.entries().collect::<Vec<_>>();
475 assert_archive_content(&entries);
476
477 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_oldgnu.tar")).unwrap();
478 let entries = archive.entries().collect::<Vec<_>>();
479 assert_archive_content(&entries);
480
481 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar.tar")).unwrap();
492 let entries = archive.entries().collect::<Vec<_>>();
493 assert_archive_content(&entries);
494
495 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_v7.tar")).unwrap();
496 let entries = archive.entries().collect::<Vec<_>>();
497 assert_archive_content(&entries);
498 }
499
500 #[test]
502 fn test_archive_with_long_dir_entries() {
503 let archive =
506 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_long.tar")).unwrap();
507 let entries = archive.entries().collect::<Vec<_>>();
508
509 assert_eq!(entries.len(), 2);
510 assert_entry_content(
512 &entries[0],
513 "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",
514 7,
515 );
516 assert_entry_content(
518 &entries[1],
519 "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",
520 7,
521 );
522 }
523
524 #[test]
525 fn test_archive_with_deep_dir_entries() {
526 let archive =
529 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_deep.tar")).unwrap();
530 let entries = archive.entries().collect::<Vec<_>>();
531
532 assert_eq!(entries.len(), 1);
533 assert_entry_content(
534 &entries[0],
535 "0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/empty",
536 0,
537 );
538 }
539
540 #[test]
541 fn test_default_archive_with_dir_entries() {
542 let archive =
545 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default_with_dir.tar")).unwrap();
546 let entries = archive.entries().collect::<Vec<_>>();
547
548 assert_archive_with_dir_content(&entries);
549 }
550
551 #[test]
552 fn test_ustar_archive_with_dir_entries() {
553 let archive =
556 TarArchiveRef::new(include_bytes!("../tests/mac_tar_ustar_with_dir.tar")).unwrap();
557 let entries = archive.entries().collect::<Vec<_>>();
558
559 assert_archive_with_dir_content(&entries);
560 }
561
562 #[test]
563 fn test_data_fills_entire_block() {
564 let mut data = [0_u8; 4 * BLOCKSIZE];
566
567 {
569 data[BLOCKSIZE..BLOCKSIZE * 2].fill(0xff);
570 }
571
572 {
574 let hdr = unsafe { data.as_mut_ptr().cast::<PosixHeader>().as_mut().unwrap() };
575 let blocksize_octal = "1000\0\0\0\0\0\0\0\0" ;
576 let blocksize_octal_bytes: [u8; 12] = {
577 let mut val = [0; 12];
578 val.copy_from_slice(blocksize_octal.as_bytes());
579 val
580 };
581 hdr.size = TarFormatOctal::new(blocksize_octal_bytes);
582 }
583 let archive = TarArchiveRef::new(data.as_slice()).unwrap();
584 let entries = archive.entries().collect::<Vec<_>>();
585 assert_eq!(entries.len(), 1);
586 assert!(entries[0].data.iter().all(|&v| v == 0xff));
587 }
588
589 #[cfg(feature = "alloc")]
591 #[test]
592 fn test_archive_entries_alloc() {
593 let data = include_bytes!("../tests/gnu_tar_default.tar")
594 .to_vec()
595 .into_boxed_slice();
596 let archive = TarArchive::new(data.clone()).unwrap();
597 let entries = archive.entries().collect::<Vec<_>>();
598 assert_archive_content(&entries);
599
600 assert_eq!(data, archive.into());
602 }
603
604 fn assert_entry_content(entry: &ArchiveEntry, filename: &str, size: usize) {
606 assert_eq!(entry.filename().as_str(), Ok(filename));
607 assert_eq!(entry.size(), size);
608 assert_eq!(entry.data().len(), size);
609 }
610
611 fn assert_archive_content(entries: &[ArchiveEntry]) {
615 use crate::ModeFlags;
616 let permissions = ModeFlags::OwnerRead
617 | ModeFlags::OwnerWrite
618 | ModeFlags::OwnerExec
619 | ModeFlags::GroupRead
620 | ModeFlags::GroupWrite
621 | ModeFlags::GroupExec
622 | ModeFlags::OthersRead
623 | ModeFlags::OthersWrite
624 | ModeFlags::OthersExec;
625 let rw_rw_r__ = ModeFlags::OwnerRead
626 | ModeFlags::OwnerWrite
627 | ModeFlags::GroupRead
628 | ModeFlags::GroupWrite
629 | ModeFlags::OthersRead;
630 #[allow(non_snake_case)]
632 let rw_r__r__ = ModeFlags::OwnerRead
633 | ModeFlags::OwnerWrite
634 | ModeFlags::GroupRead
635 | ModeFlags::OthersRead;
636
637 assert_eq!(entries.len(), 3);
638
639 assert_entry_content(&entries[0], "bye_world_513b.txt", 513);
640 assert_eq!(
641 entries[0].data_as_str().expect("Should be valid UTF-8"),
642 include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
644 );
645 assert_eq!(
646 entries[0]
647 .posix_header()
648 .mode
649 .to_flags()
650 .unwrap()
651 .intersection(permissions),
652 rw_rw_r__
653 );
654
655 assert_entry_content(&entries[1], "hello_world_513b.txt", 513);
658 assert_eq!(
659 entries[1].data_as_str().expect("Should be valid UTF-8"),
660 include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
662 );
663 assert_eq!(
664 entries[1]
665 .posix_header()
666 .mode
667 .to_flags()
668 .unwrap()
669 .intersection(permissions),
670 rw_rw_r__
671 );
672
673 assert_entry_content(&entries[2], "hello_world.txt", 12);
674 assert_eq!(
675 entries[2].data_as_str().expect("Should be valid UTF-8"),
676 "Hello World\n",
677 "file content must match"
678 );
679 assert_eq!(
680 entries[2]
681 .posix_header()
682 .mode
683 .to_flags()
684 .unwrap()
685 .intersection(permissions),
686 rw_r__r__
687 );
688 }
689
690 fn assert_archive_with_dir_content(entries: &[ArchiveEntry]) {
694 assert_eq!(entries.len(), 3);
695
696 assert_entry_content(&entries[0], "tests/hello_world.txt", 12);
697 assert_eq!(
698 entries[0].data_as_str().expect("Should be valid UTF-8"),
699 "Hello World\n",
700 "file content must match"
701 );
702
703 assert_entry_content(&entries[1], "tests/bye_world_513b.txt", 513);
706 assert_eq!(
707 entries[1].data_as_str().expect("Should be valid UTF-8"),
708 include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
710 );
711
712 assert_entry_content(&entries[2], "tests/hello_world_513b.txt", 513);
713 assert_eq!(
714 entries[2].data_as_str().expect("Should be valid UTF-8"),
715 include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
717 );
718 }
719}