1use crate::header::PosixHeader;
28use crate::tar_format_types::TarFormatString;
29use crate::{BLOCKSIZE, POSIX_1003_MAX_FILENAME_LEN};
30#[cfg(feature = "alloc")]
31use alloc::boxed::Box;
32use core::fmt::{Debug, Display, Formatter};
33use core::str::Utf8Error;
34use log::{error, warn};
35
36pub const MIN_BLOCK_COUNT: usize = 3;
40
41pub struct ArchiveEntry<'a> {
44 filename: TarFormatString<POSIX_1003_MAX_FILENAME_LEN>,
45 data: &'a [u8],
46 size: usize,
47 posix_header: &'a PosixHeader,
48}
49
50#[allow(unused)]
51impl<'a> ArchiveEntry<'a> {
52 const fn new(
53 filename: TarFormatString<POSIX_1003_MAX_FILENAME_LEN>,
54 data: &'a [u8],
55 posix_header: &'a PosixHeader,
56 ) -> Self {
57 ArchiveEntry {
58 filename,
59 data,
60 size: data.len(),
61 posix_header,
62 }
63 }
64
65 #[must_use]
68 pub const fn filename(&self) -> TarFormatString<{ POSIX_1003_MAX_FILENAME_LEN }> {
69 self.filename
70 }
71
72 #[must_use]
74 pub const fn data(&self) -> &'a [u8] {
75 self.data
76 }
77
78 #[allow(clippy::missing_const_for_fn)]
83 pub fn data_as_str(&self) -> Result<&'a str, Utf8Error> {
84 core::str::from_utf8(self.data)
85 }
86
87 #[must_use]
89 pub const fn size(&self) -> usize {
90 self.size
91 }
92
93 #[must_use]
95 pub const fn posix_header(&self) -> &PosixHeader {
96 self.posix_header
97 }
98}
99
100impl Debug for ArchiveEntry<'_> {
101 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
102 f.debug_struct("ArchiveEntry")
103 .field("filename", &self.filename().as_str())
104 .field("size", &self.size())
105 .field("data", &"<bytes>")
106 .finish()
107 }
108}
109
110#[derive(Copy, Clone, Debug, PartialEq, Eq)]
116pub struct CorruptDataError;
117
118impl Display for CorruptDataError {
119 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
120 Debug::fmt(self, f)
121 }
122}
123
124impl core::error::Error for CorruptDataError {}
125
126#[cfg(feature = "alloc")]
131#[derive(Clone, Debug, PartialEq, Eq)]
132pub struct TarArchive {
133 data: Box<[u8]>,
134}
135
136#[cfg(feature = "alloc")]
137impl TarArchive {
138 pub fn new(data: Box<[u8]>) -> Result<Self, CorruptDataError> {
143 TarArchiveRef::validate(&data).map(|_| Self { data })
144 }
145
146 #[must_use]
150 pub fn entries(&self) -> ArchiveEntryIterator<'_> {
151 ArchiveEntryIterator::new(self.data.as_ref())
152 }
153}
154
155#[cfg(feature = "alloc")]
156impl From<Box<[u8]>> for TarArchive {
157 fn from(data: Box<[u8]>) -> Self {
158 Self::new(data).unwrap()
159 }
160}
161
162#[cfg(feature = "alloc")]
163impl From<TarArchive> for Box<[u8]> {
164 fn from(ar: TarArchive) -> Self {
165 ar.data
166 }
167}
168
169#[derive(Clone, Debug, PartialEq, Eq)]
172pub struct TarArchiveRef<'a> {
173 data: &'a [u8],
174}
175
176#[allow(unused)]
177impl<'a> TarArchiveRef<'a> {
178 pub fn new(data: &'a [u8]) -> Result<Self, CorruptDataError> {
184 Self::validate(data).map(|()| Self { data })
185 }
186
187 fn validate(data: &'a [u8]) -> Result<(), CorruptDataError> {
188 let is_malformed = (data.len() % BLOCKSIZE) != 0;
189 let has_min_block_count = data.len() / BLOCKSIZE >= MIN_BLOCK_COUNT;
190 (!data.is_empty() && !is_malformed && has_min_block_count)
191 .then_some(())
192 .ok_or(CorruptDataError)
193 }
194
195 #[must_use]
197 pub fn entries(&self) -> ArchiveEntryIterator<'a> {
198 ArchiveEntryIterator::new(self.data)
199 }
200}
201
202#[derive(Debug)]
204pub struct ArchiveHeaderIterator<'a> {
205 archive_data: &'a [u8],
206 next_hdr_block_index: usize,
207}
208
209impl<'a> ArchiveHeaderIterator<'a> {
210 #[must_use]
215 pub fn new(archive: &'a [u8]) -> Self {
216 assert!(!archive.is_empty());
217 assert_eq!(archive.len() % BLOCKSIZE, 0);
218 Self {
219 archive_data: archive,
220 next_hdr_block_index: 0,
221 }
222 }
223
224 const fn block_as_header(&self, block_index: usize) -> &'a PosixHeader {
226 unsafe {
227 self.archive_data
228 .as_ptr()
229 .add(block_index * BLOCKSIZE)
230 .cast::<PosixHeader>()
231 .as_ref()
232 .unwrap()
233 }
234 }
235}
236
237type BlockIndex = usize;
238
239impl<'a> Iterator for ArchiveHeaderIterator<'a> {
240 type Item = (BlockIndex, &'a PosixHeader);
241
242 fn next(&mut self) -> Option<Self::Item> {
248 let total_block_count = self.archive_data.len() / BLOCKSIZE;
249 if self.next_hdr_block_index >= total_block_count {
250 warn!(
251 "Invalid block index. Probably the Tar is corrupt: an header had an invalid payload size"
252 );
253 return None;
254 }
255
256 let hdr = self.block_as_header(self.next_hdr_block_index);
257 let block_index = self.next_hdr_block_index;
258
259 self.next_hdr_block_index += 1;
261
262 if let Ok(typeflag) = hdr.typeflag.try_to_type_flag() {
266 if typeflag.is_regular_file() {
267 let payload_block_count = hdr
268 .payload_block_count()
269 .inspect_err(|e| {
270 log::error!("Unparsable size ({e:?}) in header {hdr:#?}");
271 })
272 .ok()?;
273 self.next_hdr_block_index += payload_block_count;
274 }
275 }
276
277 Some((block_index, hdr))
278 }
279}
280
281impl ExactSizeIterator for ArchiveEntryIterator<'_> {}
282
283#[derive(Debug)]
289pub struct ArchiveEntryIterator<'a>(ArchiveHeaderIterator<'a>);
290
291impl<'a> ArchiveEntryIterator<'a> {
292 fn new(archive: &'a [u8]) -> Self {
293 Self(ArchiveHeaderIterator::new(archive))
294 }
295
296 fn next_hdr(&mut self) -> Option<(BlockIndex, &'a PosixHeader)> {
297 self.0.next()
298 }
299}
300
301impl<'a> Iterator for ArchiveEntryIterator<'a> {
302 type Item = ArchiveEntry<'a>;
303
304 fn next(&mut self) -> Option<Self::Item> {
305 let (mut block_index, mut hdr) = self.next_hdr()?;
306
307 while !hdr
310 .typeflag
311 .try_to_type_flag()
312 .inspect_err(|e| error!("Invalid TypeFlag: {e:?}"))
313 .ok()?
314 .is_regular_file()
315 {
316 warn!(
317 "Skipping entry of type {:?} (not supported yet)",
318 hdr.typeflag
319 );
320
321 (block_index, hdr) = self.next_hdr()?;
323 }
324
325 if hdr.is_zero_block() {
327 if self.next_hdr()?.1.is_zero_block() {
328 return None;
330 }
331
332 panic!("should never have a missing double zero block: is the Tar archive corrupt?");
333 }
334
335 let payload_size: usize = hdr
336 .size
337 .as_number()
338 .inspect_err(|e| error!("Can't parse the file size from the header. {e:#?}"))
339 .ok()?;
340
341 let idx_first_data_block = block_index + 1;
342 let idx_begin = idx_first_data_block * BLOCKSIZE;
343 let idx_end_exclusive = idx_begin + payload_size;
344
345 let max_data_end_index_exclusive = self.0.archive_data.len() - 2 * BLOCKSIZE;
348 if idx_end_exclusive > max_data_end_index_exclusive {
349 warn!(
350 "Invalid Tar. The size of the payload ({payload_size}) is larger than what is valid"
351 );
352 return None;
353 }
354
355 let file_bytes = &self.0.archive_data[idx_begin..idx_end_exclusive];
356
357 let mut filename =
358 TarFormatString::<POSIX_1003_MAX_FILENAME_LEN>::new([0; POSIX_1003_MAX_FILENAME_LEN]);
359
360 if (
363 hdr.magic.as_str(),
364 hdr.version.as_str(),
365 hdr.prefix.is_empty(),
366 ) == (Ok("ustar"), Ok("00"), false)
367 {
368 filename.append(&hdr.prefix);
369 filename.append(&TarFormatString::<1>::new([b'/']));
370 }
371 filename.append(&hdr.name);
372 Some(ArchiveEntry::new(filename, file_bytes, hdr))
373 }
374}
375
376#[cfg(test)]
377mod tests {
378 use super::*;
379 use crate::TarFormatOctal;
380 use std::vec::Vec;
381
382 #[test]
383 #[rustfmt::skip]
384 fn test_constructor_returns_error() {
385 assert_eq!(TarArchiveRef::new(&[0]), Err(CorruptDataError));
386 assert_eq!(TarArchiveRef::new(&[]), Err(CorruptDataError));
387 assert!(TarArchiveRef::new(&[0; BLOCKSIZE * MIN_BLOCK_COUNT]).is_ok());
388
389 #[cfg(feature = "alloc")]
390 {
391 assert_eq!(TarArchive::new(vec![].into_boxed_slice()), Err(CorruptDataError));
392 assert_eq!(TarArchive::new(vec![0].into_boxed_slice()), Err(CorruptDataError));
393 assert!(TarArchive::new(vec![0; BLOCKSIZE * MIN_BLOCK_COUNT].into_boxed_slice()).is_ok());
394 };
395 }
396
397 #[test]
398 fn test_header_iterator() {
399 let archive = include_bytes!("../tests/gnu_tar_default.tar");
400 let iter = ArchiveHeaderIterator::new(archive);
401 let names = iter
402 .map(|(_i, hdr)| hdr.name.as_str().unwrap())
403 .collect::<Vec<_>>();
404
405 assert_eq!(
406 names.as_slice(),
407 &[
408 "bye_world_513b.txt",
409 "hello_world_513b.txt",
410 "hello_world.txt",
411 ]
412 )
413 }
414
415 #[test]
417 fn test_print_archive_headers() {
418 let data = include_bytes!("../tests/gnu_tar_default.tar");
419
420 let iter = ArchiveHeaderIterator::new(data);
421 let entries = iter.map(|(_, hdr)| hdr).collect::<Vec<_>>();
422 println!("{:#?}", entries);
423 }
424
425 #[test]
427 fn test_print_archive_list() {
428 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap();
429 let entries = archive.entries().collect::<Vec<_>>();
430 println!("{:#?}", entries);
431 }
432
433 #[test]
441 fn test_weird_fuzzing_tarballs() {
442 let main_tarball =
447 TarArchiveRef::new(include_bytes!("../tests/weird_fuzzing_tarballs.tar")).unwrap();
448
449 let mut all_entries = vec![];
450 for tarball in main_tarball.entries() {
451 let tarball = TarArchiveRef::new(tarball.data()).unwrap();
452 for entry in tarball.entries() {
453 all_entries.push(entry.filename());
454 }
455 }
456
457 for entry in all_entries {
459 eprintln!("\"{entry:?}\",");
460 }
461 }
462
463 #[test]
465 fn test_archive_entries() {
466 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap();
467 let entries = archive.entries().collect::<Vec<_>>();
468 assert_archive_content(&entries);
469
470 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_gnu.tar")).unwrap();
471 let entries = archive.entries().collect::<Vec<_>>();
472 assert_archive_content(&entries);
473
474 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_oldgnu.tar")).unwrap();
475 let entries = archive.entries().collect::<Vec<_>>();
476 assert_archive_content(&entries);
477
478 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar.tar")).unwrap();
489 let entries = archive.entries().collect::<Vec<_>>();
490 assert_archive_content(&entries);
491
492 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_v7.tar")).unwrap();
493 let entries = archive.entries().collect::<Vec<_>>();
494 assert_archive_content(&entries);
495 }
496
497 #[test]
499 fn test_archive_with_long_dir_entries() {
500 let archive =
503 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_long.tar")).unwrap();
504 let entries = archive.entries().collect::<Vec<_>>();
505
506 assert_eq!(entries.len(), 2);
507 assert_entry_content(
509 &entries[0],
510 "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",
511 7,
512 );
513 assert_entry_content(
515 &entries[1],
516 "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",
517 7,
518 );
519 }
520
521 #[test]
522 fn test_archive_with_deep_dir_entries() {
523 let archive =
526 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_deep.tar")).unwrap();
527 let entries = archive.entries().collect::<Vec<_>>();
528
529 assert_eq!(entries.len(), 1);
530 assert_entry_content(
531 &entries[0],
532 "0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/empty",
533 0,
534 );
535 }
536
537 #[test]
538 fn test_default_archive_with_dir_entries() {
539 let archive =
542 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default_with_dir.tar")).unwrap();
543 let entries = archive.entries().collect::<Vec<_>>();
544
545 assert_archive_with_dir_content(&entries);
546 }
547
548 #[test]
549 fn test_ustar_archive_with_dir_entries() {
550 let archive =
553 TarArchiveRef::new(include_bytes!("../tests/mac_tar_ustar_with_dir.tar")).unwrap();
554 let entries = archive.entries().collect::<Vec<_>>();
555
556 assert_archive_with_dir_content(&entries);
557 }
558
559 #[test]
560 fn test_data_fills_entire_block() {
561 let mut data = [0_u8; 4 * BLOCKSIZE];
563
564 {
566 data[BLOCKSIZE..BLOCKSIZE * 2].fill(0xff);
567 }
568
569 {
571 let hdr = unsafe { data.as_mut_ptr().cast::<PosixHeader>().as_mut().unwrap() };
572 let blocksize_octal = "1000\0\0\0\0\0\0\0\0" ;
573 let blocksize_octal_bytes: [u8; 12] = {
574 let mut val = [0; 12];
575 val.copy_from_slice(blocksize_octal.as_bytes());
576 val
577 };
578 hdr.size = TarFormatOctal::new(blocksize_octal_bytes);
579 }
580 let archive = TarArchiveRef::new(data.as_slice()).unwrap();
581 let entries = archive.entries().collect::<Vec<_>>();
582 assert_eq!(entries.len(), 1);
583 assert!(entries[0].data.iter().all(|&v| v == 0xff));
584 }
585
586 #[cfg(feature = "alloc")]
588 #[test]
589 fn test_archive_entries_alloc() {
590 let data = include_bytes!("../tests/gnu_tar_default.tar")
591 .to_vec()
592 .into_boxed_slice();
593 let archive = TarArchive::new(data.clone()).unwrap();
594 let entries = archive.entries().collect::<Vec<_>>();
595 assert_archive_content(&entries);
596
597 assert_eq!(data, archive.into());
599 }
600
601 fn assert_entry_content(entry: &ArchiveEntry, filename: &str, size: usize) {
603 assert_eq!(entry.filename().as_str(), Ok(filename));
604 assert_eq!(entry.size(), size);
605 assert_eq!(entry.data().len(), size);
606 }
607
608 fn assert_archive_content(entries: &[ArchiveEntry]) {
612 use crate::ModeFlags;
613 let permissions = ModeFlags::OwnerRead
614 | ModeFlags::OwnerWrite
615 | ModeFlags::OwnerExec
616 | ModeFlags::GroupRead
617 | ModeFlags::GroupWrite
618 | ModeFlags::GroupExec
619 | ModeFlags::OthersRead
620 | ModeFlags::OthersWrite
621 | ModeFlags::OthersExec;
622 let rw_rw_r__ = ModeFlags::OwnerRead
623 | ModeFlags::OwnerWrite
624 | ModeFlags::GroupRead
625 | ModeFlags::GroupWrite
626 | ModeFlags::OthersRead;
627 #[allow(non_snake_case)]
629 let rw_r__r__ = ModeFlags::OwnerRead
630 | ModeFlags::OwnerWrite
631 | ModeFlags::GroupRead
632 | ModeFlags::OthersRead;
633
634 assert_eq!(entries.len(), 3);
635
636 assert_entry_content(&entries[0], "bye_world_513b.txt", 513);
637 assert_eq!(
638 entries[0].data_as_str().expect("Should be valid UTF-8"),
639 include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
641 );
642 assert_eq!(
643 entries[0]
644 .posix_header()
645 .mode
646 .to_flags()
647 .unwrap()
648 .intersection(permissions),
649 rw_rw_r__
650 );
651
652 assert_entry_content(&entries[1], "hello_world_513b.txt", 513);
655 assert_eq!(
656 entries[1].data_as_str().expect("Should be valid UTF-8"),
657 include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
659 );
660 assert_eq!(
661 entries[1]
662 .posix_header()
663 .mode
664 .to_flags()
665 .unwrap()
666 .intersection(permissions),
667 rw_rw_r__
668 );
669
670 assert_entry_content(&entries[2], "hello_world.txt", 12);
671 assert_eq!(
672 entries[2].data_as_str().expect("Should be valid UTF-8"),
673 "Hello World\n",
674 "file content must match"
675 );
676 assert_eq!(
677 entries[2]
678 .posix_header()
679 .mode
680 .to_flags()
681 .unwrap()
682 .intersection(permissions),
683 rw_r__r__
684 );
685 }
686
687 fn assert_archive_with_dir_content(entries: &[ArchiveEntry]) {
691 assert_eq!(entries.len(), 3);
692
693 assert_entry_content(&entries[0], "tests/hello_world.txt", 12);
694 assert_eq!(
695 entries[0].data_as_str().expect("Should be valid UTF-8"),
696 "Hello World\n",
697 "file content must match"
698 );
699
700 assert_entry_content(&entries[1], "tests/bye_world_513b.txt", 513);
703 assert_eq!(
704 entries[1].data_as_str().expect("Should be valid UTF-8"),
705 include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
707 );
708
709 assert_entry_content(&entries[2], "tests/hello_world_513b.txt", 513);
710 assert_eq!(
711 entries[2].data_as_str().expect("Should be valid UTF-8"),
712 include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
714 );
715 }
716}