1use crate::header::PosixHeader;
28use crate::tar_format_types::TarFormatString;
29use crate::{BLOCKSIZE, POSIX_1003_MAX_FILENAME_LEN};
30#[cfg(feature = "alloc")]
31use alloc::boxed::Box;
32use core::fmt::{Debug, Display, Formatter};
33use core::str::Utf8Error;
34use log::{error, warn};
35
36pub const MIN_BLOCK_COUNT: usize = 3;
40
41pub struct ArchiveEntry<'a> {
44 filename: TarFormatString<POSIX_1003_MAX_FILENAME_LEN>,
45 data: &'a [u8],
46 size: usize,
47 posix_header: &'a PosixHeader,
48}
49
50#[allow(unused)]
51impl<'a> ArchiveEntry<'a> {
52 const fn new(
53 filename: TarFormatString<POSIX_1003_MAX_FILENAME_LEN>,
54 data: &'a [u8],
55 posix_header: &'a PosixHeader,
56 ) -> Self {
57 ArchiveEntry {
58 filename,
59 data,
60 size: data.len(),
61 posix_header,
62 }
63 }
64
65 #[must_use]
68 pub const fn filename(&self) -> TarFormatString<{ POSIX_1003_MAX_FILENAME_LEN }> {
69 self.filename
70 }
71
72 #[must_use]
74 pub const fn data(&self) -> &'a [u8] {
75 self.data
76 }
77
78 #[allow(clippy::missing_const_for_fn)]
80 pub fn data_as_str(&self) -> Result<&'a str, Utf8Error> {
81 core::str::from_utf8(self.data)
82 }
83
84 #[must_use]
86 pub const fn size(&self) -> usize {
87 self.size
88 }
89
90 #[must_use]
92 pub const fn posix_header(&self) -> &PosixHeader {
93 self.posix_header
94 }
95}
96
97impl Debug for ArchiveEntry<'_> {
98 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
99 f.debug_struct("ArchiveEntry")
100 .field("filename", &self.filename().as_str())
101 .field("size", &self.size())
102 .field("data", &"<bytes>")
103 .finish()
104 }
105}
106
107#[derive(Copy, Clone, Debug, PartialEq, Eq)]
113pub struct CorruptDataError;
114
115impl Display for CorruptDataError {
116 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
117 Debug::fmt(self, f)
118 }
119}
120
121#[cfg(feature = "unstable")]
122impl core::error::Error for CorruptDataError {}
123
124#[cfg(feature = "alloc")]
129#[derive(Clone, Debug, PartialEq, Eq)]
130pub struct TarArchive {
131 data: Box<[u8]>,
132}
133
134#[cfg(feature = "alloc")]
135impl TarArchive {
136 pub fn new(data: Box<[u8]>) -> Result<Self, CorruptDataError> {
141 TarArchiveRef::validate(&data).map(|_| Self { data })
142 }
143
144 #[must_use]
148 pub fn entries(&self) -> ArchiveEntryIterator {
149 ArchiveEntryIterator::new(self.data.as_ref())
150 }
151}
152
153#[cfg(feature = "alloc")]
154impl From<Box<[u8]>> for TarArchive {
155 fn from(data: Box<[u8]>) -> Self {
156 Self::new(data).unwrap()
157 }
158}
159
160#[cfg(feature = "alloc")]
161impl From<TarArchive> for Box<[u8]> {
162 fn from(ar: TarArchive) -> Self {
163 ar.data
164 }
165}
166
167#[derive(Clone, Debug, PartialEq, Eq)]
170pub struct TarArchiveRef<'a> {
171 data: &'a [u8],
172}
173
174#[allow(unused)]
175impl<'a> TarArchiveRef<'a> {
176 pub fn new(data: &'a [u8]) -> Result<Self, CorruptDataError> {
181 Self::validate(data).map(|_| Self { data })
182 }
183
184 fn validate(data: &'a [u8]) -> Result<(), CorruptDataError> {
185 let is_malformed = (data.len() % BLOCKSIZE) != 0;
186 let has_min_block_count = data.len() / BLOCKSIZE >= MIN_BLOCK_COUNT;
187 (!data.is_empty() && !is_malformed && has_min_block_count)
188 .then_some(())
189 .ok_or(CorruptDataError)
190 }
191
192 #[must_use]
194 pub fn entries(&self) -> ArchiveEntryIterator {
195 ArchiveEntryIterator::new(self.data)
196 }
197}
198
199#[derive(Debug)]
201pub struct ArchiveHeaderIterator<'a> {
202 archive_data: &'a [u8],
203 next_hdr_block_index: usize,
204}
205
206impl<'a> ArchiveHeaderIterator<'a> {
207 #[must_use]
208 pub fn new(archive: &'a [u8]) -> Self {
209 assert!(!archive.is_empty());
210 assert_eq!(archive.len() % BLOCKSIZE, 0);
211 Self {
212 archive_data: archive,
213 next_hdr_block_index: 0,
214 }
215 }
216
217 fn block_as_header(&self, block_index: usize) -> &'a PosixHeader {
219 unsafe {
220 self.archive_data
221 .as_ptr()
222 .add(block_index * BLOCKSIZE)
223 .cast::<PosixHeader>()
224 .as_ref()
225 .unwrap()
226 }
227 }
228}
229
230type BlockIndex = usize;
231
232impl<'a> Iterator for ArchiveHeaderIterator<'a> {
233 type Item = (BlockIndex, &'a PosixHeader);
234
235 fn next(&mut self) -> Option<Self::Item> {
241 let total_block_count = self.archive_data.len() / BLOCKSIZE;
242 if self.next_hdr_block_index >= total_block_count {
243 warn!("Invalid block index. Probably the Tar is corrupt: an header had an invalid payload size");
244 return None;
245 }
246
247 let hdr = self.block_as_header(self.next_hdr_block_index);
248 let block_index = self.next_hdr_block_index;
249
250 self.next_hdr_block_index += 1;
252
253 if let Ok(typeflag) = hdr.typeflag.try_to_type_flag() {
257 if typeflag.is_regular_file() {
258 let payload_block_count = hdr
259 .payload_block_count()
260 .inspect_err(|e| {
261 log::error!("Unparsable size ({e:?}) in header {hdr:#?}");
262 })
263 .ok()?;
264 self.next_hdr_block_index += payload_block_count;
265 }
266 }
267
268 Some((block_index, hdr))
269 }
270}
271
272impl ExactSizeIterator for ArchiveEntryIterator<'_> {}
273
274#[derive(Debug)]
280pub struct ArchiveEntryIterator<'a>(ArchiveHeaderIterator<'a>);
281
282impl<'a> ArchiveEntryIterator<'a> {
283 fn new(archive: &'a [u8]) -> Self {
284 Self(ArchiveHeaderIterator::new(archive))
285 }
286
287 fn next_hdr(&mut self) -> Option<(BlockIndex, &'a PosixHeader)> {
288 self.0.next()
289 }
290}
291
292impl<'a> Iterator for ArchiveEntryIterator<'a> {
293 type Item = ArchiveEntry<'a>;
294
295 fn next(&mut self) -> Option<Self::Item> {
296 let (mut block_index, mut hdr) = self.next_hdr()?;
297
298 while !hdr
301 .typeflag
302 .try_to_type_flag()
303 .inspect_err(|e| error!("Invalid TypeFlag: {e:?}"))
304 .ok()?
305 .is_regular_file()
306 {
307 warn!(
308 "Skipping entry of type {:?} (not supported yet)",
309 hdr.typeflag
310 );
311
312 (block_index, hdr) = self.next_hdr()?;
314 }
315
316 if hdr.is_zero_block() {
318 if self.next_hdr()?.1.is_zero_block() {
319 return None;
321 } else {
322 panic!("Never expected to have a situation where self.next_hdr() returns a zero block and the next one is not a zero block, as we should never point to an 'end zero block of a regular file'");
323 }
324 }
325
326 let payload_size: usize = hdr
327 .size
328 .as_number()
329 .inspect_err(|e| error!("Can't parse the file size from the header. {e:#?}"))
330 .ok()?;
331
332 let idx_first_data_block = block_index + 1;
333 let idx_begin = idx_first_data_block * BLOCKSIZE;
334 let idx_end_exclusive = idx_begin + payload_size;
335
336 let max_data_end_index_exclusive = self.0.archive_data.len() - 2 * BLOCKSIZE;
339 if idx_end_exclusive > max_data_end_index_exclusive {
340 warn!("Invalid Tar. The size of the payload ({payload_size}) is larger than what is valid");
341 return None;
342 }
343
344 let file_bytes = &self.0.archive_data[idx_begin..idx_end_exclusive];
345
346 let mut filename: TarFormatString<256> =
347 TarFormatString::<POSIX_1003_MAX_FILENAME_LEN>::new([0; POSIX_1003_MAX_FILENAME_LEN]);
348
349 if (
352 hdr.magic.as_str(),
353 hdr.version.as_str(),
354 hdr.prefix.is_empty(),
355 ) == (Ok("ustar"), Ok("00"), false)
356 {
357 filename.append(&hdr.prefix);
358 filename.append(&TarFormatString::<1>::new([b'/']));
359 }
360 filename.append(&hdr.name);
361 Some(ArchiveEntry::new(filename, file_bytes, hdr))
362 }
363}
364
365#[cfg(test)]
366mod tests {
367 use super::*;
368 use crate::TarFormatOctal;
369 use std::vec::Vec;
370
371 #[test]
372 #[rustfmt::skip]
373 fn test_constructor_returns_error() {
374 assert_eq!(TarArchiveRef::new(&[0]), Err(CorruptDataError));
375 assert_eq!(TarArchiveRef::new(&[]), Err(CorruptDataError));
376 assert!(TarArchiveRef::new(&[0; BLOCKSIZE * MIN_BLOCK_COUNT]).is_ok());
377
378 #[cfg(feature = "alloc")]
379 {
380 assert_eq!(TarArchive::new(vec![].into_boxed_slice()), Err(CorruptDataError));
381 assert_eq!(TarArchive::new(vec![0].into_boxed_slice()), Err(CorruptDataError));
382 assert!(TarArchive::new(vec![0; BLOCKSIZE * MIN_BLOCK_COUNT].into_boxed_slice()).is_ok());
383 };
384 }
385
386 #[test]
387 fn test_header_iterator() {
388 let archive = include_bytes!("../tests/gnu_tar_default.tar");
389 let iter = ArchiveHeaderIterator::new(archive);
390 let names = iter
391 .map(|(_i, hdr)| hdr.name.as_str().unwrap())
392 .collect::<Vec<_>>();
393
394 assert_eq!(
395 names.as_slice(),
396 &[
397 "bye_world_513b.txt",
398 "hello_world_513b.txt",
399 "hello_world.txt",
400 ]
401 )
402 }
403
404 #[test]
406 fn test_print_archive_headers() {
407 let data = include_bytes!("../tests/gnu_tar_default.tar");
408
409 let iter = ArchiveHeaderIterator::new(data);
410 let entries = iter.map(|(_, hdr)| hdr).collect::<Vec<_>>();
411 println!("{:#?}", entries);
412 }
413
414 #[test]
416 fn test_print_archive_list() {
417 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap();
418 let entries = archive.entries().collect::<Vec<_>>();
419 println!("{:#?}", entries);
420 }
421
422 #[test]
430 fn test_weird_fuzzing_tarballs() {
431 let main_tarball =
436 TarArchiveRef::new(include_bytes!("../tests/weird_fuzzing_tarballs.tar")).unwrap();
437
438 let mut all_entries = vec![];
439 for tarball in main_tarball.entries() {
440 let tarball = TarArchiveRef::new(tarball.data()).unwrap();
441 for entry in tarball.entries() {
442 all_entries.push(entry.filename());
443 }
444 }
445
446 for entry in all_entries {
448 eprintln!("\"{entry:?}\",");
449 }
450 }
451
452 #[test]
454 fn test_archive_entries() {
455 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")).unwrap();
456 let entries = archive.entries().collect::<Vec<_>>();
457 assert_archive_content(&entries);
458
459 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_gnu.tar")).unwrap();
460 let entries = archive.entries().collect::<Vec<_>>();
461 assert_archive_content(&entries);
462
463 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_oldgnu.tar")).unwrap();
464 let entries = archive.entries().collect::<Vec<_>>();
465 assert_archive_content(&entries);
466
467 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar.tar")).unwrap();
478 let entries = archive.entries().collect::<Vec<_>>();
479 assert_archive_content(&entries);
480
481 let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_v7.tar")).unwrap();
482 let entries = archive.entries().collect::<Vec<_>>();
483 assert_archive_content(&entries);
484 }
485
486 #[test]
488 fn test_archive_with_long_dir_entries() {
489 let archive =
492 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_long.tar")).unwrap();
493 let entries = archive.entries().collect::<Vec<_>>();
494
495 assert_eq!(entries.len(), 2);
496 assert_entry_content(&entries[0], "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", 7);
498 assert_entry_content(&entries[1], "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", 7);
500 }
501
502 #[test]
503 fn test_archive_with_deep_dir_entries() {
504 let archive =
507 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar_deep.tar")).unwrap();
508 let entries = archive.entries().collect::<Vec<_>>();
509
510 assert_eq!(entries.len(), 1);
511 assert_entry_content(&entries[0], "0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/0123456789/empty", 0);
512 }
513
514 #[test]
515 fn test_default_archive_with_dir_entries() {
516 let archive =
519 TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default_with_dir.tar")).unwrap();
520 let entries = archive.entries().collect::<Vec<_>>();
521
522 assert_archive_with_dir_content(&entries);
523 }
524
525 #[test]
526 fn test_ustar_archive_with_dir_entries() {
527 let archive =
530 TarArchiveRef::new(include_bytes!("../tests/mac_tar_ustar_with_dir.tar")).unwrap();
531 let entries = archive.entries().collect::<Vec<_>>();
532
533 assert_archive_with_dir_content(&entries);
534 }
535
536 #[test]
537 fn test_data_fills_entire_block() {
538 let mut data = [0_u8; 4 * BLOCKSIZE];
540
541 {
543 data[BLOCKSIZE..BLOCKSIZE * 2].fill(0xff);
544 }
545
546 {
548 let hdr = unsafe { data.as_mut_ptr().cast::<PosixHeader>().as_mut().unwrap() };
549 let blocksize_octal = "1000\0\0\0\0\0\0\0\0" ;
550 let blocksize_octal_bytes: [u8; 12] = {
551 let mut val = [0; 12];
552 val.copy_from_slice(blocksize_octal.as_bytes());
553 val
554 };
555 hdr.size = TarFormatOctal::new(blocksize_octal_bytes);
556 }
557 let archive = TarArchiveRef::new(data.as_slice()).unwrap();
558 let entries = archive.entries().collect::<Vec<_>>();
559 assert_eq!(entries.len(), 1);
560 assert!(entries[0].data.iter().all(|&v| v == 0xff));
561 }
562
563 #[cfg(feature = "alloc")]
565 #[test]
566 fn test_archive_entries_alloc() {
567 let data = include_bytes!("../tests/gnu_tar_default.tar")
568 .to_vec()
569 .into_boxed_slice();
570 let archive = TarArchive::new(data.clone()).unwrap();
571 let entries = archive.entries().collect::<Vec<_>>();
572 assert_archive_content(&entries);
573
574 assert_eq!(data, archive.into());
576 }
577
578 fn assert_entry_content(entry: &ArchiveEntry, filename: &str, size: usize) {
580 assert_eq!(entry.filename().as_str(), Ok(filename));
581 assert_eq!(entry.size(), size);
582 assert_eq!(entry.data().len(), size);
583 }
584
585 fn assert_archive_content(entries: &[ArchiveEntry]) {
589 use crate::ModeFlags;
590 let permissions = ModeFlags::OwnerRead
591 | ModeFlags::OwnerWrite
592 | ModeFlags::OwnerExec
593 | ModeFlags::GroupRead
594 | ModeFlags::GroupWrite
595 | ModeFlags::GroupExec
596 | ModeFlags::OthersRead
597 | ModeFlags::OthersWrite
598 | ModeFlags::OthersExec;
599 let rw_rw_r__ = ModeFlags::OwnerRead
600 | ModeFlags::OwnerWrite
601 | ModeFlags::GroupRead
602 | ModeFlags::GroupWrite
603 | ModeFlags::OthersRead;
604 #[allow(non_snake_case)]
606 let rw_r__r__ = ModeFlags::OwnerRead
607 | ModeFlags::OwnerWrite
608 | ModeFlags::GroupRead
609 | ModeFlags::OthersRead;
610
611 assert_eq!(entries.len(), 3);
612
613 assert_entry_content(&entries[0], "bye_world_513b.txt", 513);
614 assert_eq!(
615 entries[0].data_as_str().expect("Should be valid UTF-8"),
616 include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
618 );
619 assert_eq!(
620 entries[0]
621 .posix_header()
622 .mode
623 .to_flags()
624 .unwrap()
625 .intersection(permissions),
626 rw_rw_r__
627 );
628
629 assert_entry_content(&entries[1], "hello_world_513b.txt", 513);
632 assert_eq!(
633 entries[1].data_as_str().expect("Should be valid UTF-8"),
634 include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
636 );
637 assert_eq!(
638 entries[1]
639 .posix_header()
640 .mode
641 .to_flags()
642 .unwrap()
643 .intersection(permissions),
644 rw_rw_r__
645 );
646
647 assert_entry_content(&entries[2], "hello_world.txt", 12);
648 assert_eq!(
649 entries[2].data_as_str().expect("Should be valid UTF-8"),
650 "Hello World\n",
651 "file content must match"
652 );
653 assert_eq!(
654 entries[2]
655 .posix_header()
656 .mode
657 .to_flags()
658 .unwrap()
659 .intersection(permissions),
660 rw_r__r__
661 );
662 }
663
664 fn assert_archive_with_dir_content(entries: &[ArchiveEntry]) {
668 assert_eq!(entries.len(), 3);
669
670 assert_entry_content(&entries[0], "tests/hello_world.txt", 12);
671 assert_eq!(
672 entries[0].data_as_str().expect("Should be valid UTF-8"),
673 "Hello World\n",
674 "file content must match"
675 );
676
677 assert_entry_content(&entries[1], "tests/bye_world_513b.txt", 513);
680 assert_eq!(
681 entries[1].data_as_str().expect("Should be valid UTF-8"),
682 include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
684 );
685
686 assert_entry_content(&entries[2], "tests/hello_world_513b.txt", 513);
687 assert_eq!(
688 entries[2].data_as_str().expect("Should be valid UTF-8"),
689 include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
691 );
692 }
693}