1#![deny(missing_docs)]
2#![deny(rustdoc::broken_intra_doc_links)]
3#![deny(rustdoc::invalid_rust_codeblocks)]
4
5#[cfg(all(feature = "smol", feature = "tokio"))]
199compile_error!("features `smol` and `tokio` are mutually exclusive");
200#[cfg(not(any(feature = "smol", feature = "tokio")))]
201compile_error!("either feature `smol` or `tokio` must be enabled");
202
203use {
204 async_lock::Mutex,
205 futures_lite::Stream,
206 futures_sink::Sink,
207 pin_project_lite::pin_project,
208 std::{
209 future::{poll_fn, Future},
210 io::{Error, ErrorKind, Result},
211 pin::{pin, Pin},
212 str::from_utf8,
213 sync::Arc,
214 task::{self, Context, Poll},
215 time::{Duration, SystemTime, UNIX_EPOCH},
216 },
217};
218
219#[cfg(feature = "smol")]
220use futures_lite::io::{AsyncRead, AsyncWrite};
221#[cfg(feature = "tokio")]
222use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
223
224const BLOCK_SIZE: usize = 512;
225const SKIP_BUFFER_SIZE: usize = 64 * 1024;
226const PATH_MAX: usize = 4096;
227const PAX_HEADER_MAX_SIZE: usize = 1024 * 1024;
228
229#[cfg(feature = "smol")]
230fn poll_read_compat<R: AsyncRead + ?Sized>(
231 reader: Pin<&mut R>,
232 cx: &mut Context<'_>,
233 buf: &mut [u8],
234) -> Poll<Result<usize>> {
235 reader.poll_read(cx, buf)
236}
237
238#[cfg(feature = "tokio")]
239fn poll_read_compat<R: AsyncRead + ?Sized>(
240 reader: Pin<&mut R>,
241 cx: &mut Context<'_>,
242 buf: &mut [u8],
243) -> Poll<Result<usize>> {
244 let mut read_buf = ReadBuf::new(buf);
245 match reader.poll_read(cx, &mut read_buf) {
246 Poll::Ready(Ok(())) => Poll::Ready(Ok(read_buf.filled().len())),
247 Poll::Ready(Err(err)) => Poll::Ready(Err(err)),
248 Poll::Pending => Poll::Pending,
249 }
250}
251
252#[cfg(feature = "smol")]
253fn poll_close_compat<W: AsyncWrite + ?Sized>(
254 writer: Pin<&mut W>,
255 cx: &mut Context<'_>,
256) -> Poll<Result<()>> {
257 writer.poll_close(cx)
258}
259
260#[cfg(feature = "tokio")]
261fn poll_close_compat<W: AsyncWrite + ?Sized>(
262 writer: Pin<&mut W>,
263 cx: &mut Context<'_>,
264) -> Poll<Result<()>> {
265 writer.poll_shutdown(cx)
266}
267
268fn poll_regular_file_reader<'a, R: AsyncRead>(
269 this: Pin<&mut TarRegularFileReader<'a, R>>,
270 ctx: &mut Context<'_>,
271 buf: &mut [u8],
272) -> Poll<Result<usize>> {
273 let this = this.get_mut();
274 let eof = this.eof;
275 let fut = this.inner.lock();
276 let mut g = task::ready!(pin!(fut).poll(ctx));
277 let inner_pin: Pin<&mut TarReaderInner<'a, R>> = g.as_mut();
278 let inner = inner_pin.project();
279 let n;
280 if *inner.pos > eof || (*inner.pos == eof && !matches!(*inner.state, Entry)) {
281 return Poll::Ready(Ok(0));
282 } else if *inner.pos < eof {
283 let remain = *inner.nxt - *inner.pos;
284 n = if remain > 0 {
285 let size = std::cmp::min(remain, buf.len() as u64);
286 let n = task::ready!(poll_read_compat(
287 pin!(inner.reader),
288 ctx,
289 &mut buf[0..size as usize]
290 ))?;
291 if n == 0 {
292 return Poll::Ready(Err(Error::new(
293 ErrorKind::UnexpectedEof,
294 "unexpected EOF while reading archive file",
295 )));
296 }
297 n
298 } else {
299 0
300 };
301 *inner.pos += n as u64;
302 if (n as u64) < remain {
303 return Poll::Ready(Ok(n));
304 }
305 } else {
306 n = 0;
307 };
308 ctx.waker().wake_by_ref();
309 let nxt = padded_size(*inner.nxt);
310 if *inner.pos == nxt {
311 *inner.nxt = nxt + BLOCK_SIZE as u64;
312 *inner.state = Header;
313 } else {
314 *inner.nxt = nxt;
315 *inner.state = Padding;
316 }
317 Poll::Ready(Ok(n))
318}
319
320macro_rules! ready_opt {
321 ($e:expr $(,)?) => {
322 match $e {
323 Poll::Ready(Ok(t)) => t,
324 Poll::Ready(Err(err)) => return Poll::Ready(Some(Err(err))),
325 Poll::Pending => return Poll::Pending,
326 }
327 };
328}
329
330#[repr(C)]
331#[allow(missing_docs)]
332struct Header {
333 record: [u8; BLOCK_SIZE],
334}
335
336enum HeaderKind<'a> {
337 Gnu(&'a GnuHeader),
338 Ustar(&'a UstarHeader),
339 Old(&'a OldHeader),
340}
341
342trait HeaderVariant {}
343
344impl Header {
345 fn new() -> Self {
346 Self {
347 record: [0u8; BLOCK_SIZE],
348 }
349 }
350 unsafe fn cast<U: HeaderVariant>(&self) -> &U {
351 &*(self as *const Self as *const U)
352 }
353 fn buf_mut<I>(&mut self, range: I) -> &mut [u8]
354 where
355 I: core::slice::SliceIndex<[u8], Output = [u8]>,
356 {
357 &mut self.record[range]
358 }
359 fn buf<I>(&self, range: I) -> &[u8]
360 where
361 I: core::slice::SliceIndex<[u8], Output = [u8]>,
362 {
363 &self.record[range]
364 }
365 fn as_str<I>(&self, range: I) -> Result<Box<str>>
366 where
367 I: core::slice::SliceIndex<[u8], Output = [u8]>,
368 {
369 from_utf8(self.buf(range))
370 .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
371 .map(|p| p.to_string().into_boxed_str())
372 }
373 fn as_null_terminated_str<I>(&self, range: I) -> Result<Box<str>>
374 where
375 I: core::slice::SliceIndex<[u8], Output = [u8]>,
376 {
377 from_utf8(null_terminated(self.buf(range)))
378 .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
379 .map(|p| p.to_string().into_boxed_str())
380 }
381 fn kind(&self) -> HeaderKind<'_> {
382 let gnu = unsafe { self.cast::<GnuHeader>() };
383 if gnu.magic == *b"ustar " && gnu.version == *b" \0" {
384 HeaderKind::Gnu(gnu)
385 } else if gnu.magic == *b"ustar\0" && gnu.version == *b"00" {
386 HeaderKind::Ustar(unsafe { self.cast::<UstarHeader>() })
387 } else {
388 HeaderKind::Old(unsafe { self.cast::<OldHeader>() })
389 }
390 }
391 fn entry_type(&self) -> std::result::Result<Kind, u8> {
392 Kind::from_byte(unsafe { self.cast::<GnuHeader>() }.typeflag[0])
393 }
394 fn checksum(&self) -> Result<u32> {
395 parse_octal(&unsafe { self.cast::<GnuHeader>() }.cksum)
396 .map(|value| value as u32)
397 .map_err(|err| {
398 Error::new(
399 ErrorKind::InvalidData,
400 format!(
401 "invalid tar header checksum field: {:?}",
402 String::from_utf8_lossy(err)
403 ),
404 )
405 })
406 }
407 fn calculated_checksum(&self) -> u32 {
408 self.record
409 .iter()
410 .enumerate()
411 .map(|(index, byte)| {
412 if (148..156).contains(&index) {
413 u32::from(b' ')
414 } else {
415 u32::from(*byte)
416 }
417 })
418 .sum()
419 }
420 fn validate_checksum(&self) -> Result<()> {
421 let expected = self.checksum()?;
422 let actual = self.calculated_checksum();
423 if expected == actual {
424 Ok(())
425 } else {
426 Err(Error::new(
427 ErrorKind::InvalidData,
428 format!("invalid tar header checksum: expected {expected}, got {actual}"),
429 ))
430 }
431 }
432 fn is_gnu(&self) -> bool {
433 let gnu = unsafe { self.cast::<GnuHeader>() };
434 gnu.magic == *b"ustar " && gnu.version == *b" \0"
435 }
436 #[allow(dead_code)]
437 fn is_ustar(&self) -> bool {
438 let ustar = unsafe { self.cast::<UstarHeader>() };
439 ustar.magic == *b"ustar\0" && ustar.version == *b"00"
440 }
441 fn is_old(&self) -> bool {
442 let gnu = unsafe { self.cast::<GnuHeader>() };
443 gnu.magic[..5] != *b"ustar"
444 }
445 #[inline]
446 fn mode(&self) -> Result<u32> {
447 parse_octal(&unsafe { self.cast::<GnuHeader>() }.mode)
448 .map(|r| r as u32)
449 .map_err(|err| {
450 Error::new(
451 ErrorKind::InvalidData,
452 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
453 )
454 })
455 }
456 #[inline]
457 fn mtime(&self) -> Result<u64> {
458 parse_octal(&unsafe { self.cast::<GnuHeader>() }.mtime).map_err(|err| {
459 Error::new(
460 ErrorKind::InvalidData,
461 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
462 )
463 })
464 }
465 #[inline]
466 fn size(&self) -> Result<u64> {
467 parse_octal(&unsafe { self.cast::<GnuHeader>() }.size).map_err(|err| {
468 Error::new(
469 ErrorKind::InvalidData,
470 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
471 )
472 })
473 }
474 #[inline]
475 fn uid(&self) -> Result<u32> {
476 parse_octal(&unsafe { self.cast::<GnuHeader>() }.uid)
477 .map(|r| r as u32)
478 .map_err(|err| {
479 Error::new(
480 ErrorKind::InvalidData,
481 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
482 )
483 })
484 }
485 #[inline]
486 fn gid(&self) -> Result<u32> {
487 parse_octal(&unsafe { self.cast::<GnuHeader>() }.gid)
488 .map(|r| r as u32)
489 .map_err(|err| {
490 Error::new(
491 ErrorKind::InvalidData,
492 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
493 )
494 })
495 }
496 #[inline]
497 fn dev_major(&self) -> Result<u32> {
498 parse_octal(&unsafe { self.cast::<GnuHeader>() }.dev_major)
499 .map(|r| r as u32)
500 .map_err(|err| {
501 Error::new(
502 ErrorKind::InvalidData,
503 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
504 )
505 })
506 }
507 #[inline]
508 fn dev_minor(&self) -> Result<u32> {
509 parse_octal(&unsafe { self.cast::<GnuHeader>() }.dev_minor)
510 .map(|r| r as u32)
511 .map_err(|err| {
512 Error::new(
513 ErrorKind::InvalidData,
514 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
515 )
516 })
517 }
518 fn uname(&self) -> Result<Option<Box<str>>> {
519 match self.kind() {
520 HeaderKind::Gnu(gnu) => parse_name_field(&gnu.uname),
521 HeaderKind::Ustar(ustar) => parse_name_field(&ustar.uname),
522 HeaderKind::Old(_) => Ok(None),
523 }
524 }
525 fn gname(&self) -> Result<Option<Box<str>>> {
526 match self.kind() {
527 HeaderKind::Gnu(gnu) => parse_name_field(&gnu.gname),
528 HeaderKind::Ustar(ustar) => parse_name_field(&ustar.gname),
529 HeaderKind::Old(_) => Ok(None),
530 }
531 }
532 fn is_zero(&self) -> bool {
533 self.record.iter().all(|b| *b == b'\0')
534 }
535}
536
537#[derive(Debug, PartialEq, Eq)]
538#[repr(u8)]
539enum Kind {
540 File0 = b'\0',
541 File = b'0',
542 Link = b'1',
543 Symlink = b'2',
544 CharDevice = b'3',
545 BlockDevice = b'4',
546 Directory = b'5',
547 Fifo = b'6',
548 #[allow(dead_code)]
549 Continous = b'7',
550 GNULongLink = b'K',
551 GNULongName = b'L',
552 PAXLocal = b'x',
553 PAXGlobal = b'g',
554}
555impl std::fmt::Display for Kind {
556 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
557 f.write_str(match self {
558 Self::File | Self::File0 | Self::Continous => "regular file",
559 Self::Link => "link",
560 Self::Symlink => "symlink",
561 Self::CharDevice => "character device",
562 Self::BlockDevice => "block device",
563 Self::Directory => "directory",
564 Self::Fifo => "FIFO",
565 Self::GNULongName => "GNU long name extension",
566 Self::GNULongLink => "GNU long link extension",
567 Self::PAXLocal => "PAX next file extension",
568 Self::PAXGlobal => "PAX global extension",
569 })
570 }
571}
572
573impl Kind {
574 fn byte(self) -> u8 {
575 self as u8
576 }
577 fn from_byte(b: u8) -> std::result::Result<Self, u8> {
578 match b {
579 v if v == Kind::File0.byte() => Ok(Kind::File0),
580 v if v == Kind::File.byte() => Ok(Kind::File),
581 v if v == Kind::Link.byte() => Ok(Kind::Link),
582 v if v == Kind::Symlink.byte() => Ok(Kind::Symlink),
583 v if v == Kind::Directory.byte() => Ok(Kind::Directory),
584 v if v == Kind::GNULongName.byte() => Ok(Kind::GNULongName),
585 v if v == Kind::GNULongLink.byte() => Ok(Kind::GNULongLink),
586 v if v == Kind::PAXLocal.byte() => Ok(Kind::PAXLocal),
587 v if v == Kind::PAXGlobal.byte() => Ok(Kind::PAXGlobal),
588 v if v == Kind::CharDevice.byte() => Ok(Kind::CharDevice),
589 v if v == Kind::BlockDevice.byte() => Ok(Kind::BlockDevice),
590 v if v == Kind::Fifo.byte() => Ok(Kind::Fifo),
591 v if v == Kind::Continous.byte() => Ok(Kind::Continous),
592 v => Err(v),
593 }
594 }
595}
596
597#[repr(C)]
598#[allow(missing_docs)]
599struct OldHeader {
600 name: [u8; 100],
601 mode: [u8; 8],
602 uid: [u8; 8],
603 gid: [u8; 8],
604 size: [u8; 12],
605 mtime: [u8; 12],
606 cksum: [u8; 8],
607 linkflag: [u8; 1],
608 linkname: [u8; 100],
609 pad: [u8; 255],
610}
611impl HeaderVariant for OldHeader {}
612impl OldHeader {
613 fn path_name(&self) -> Result<Box<str>> {
614 path_name(&self.name).map(|p| p.to_string().into_boxed_str())
615 }
616 fn link_name(&self) -> Result<Box<str>> {
617 path_name(&self.linkname).map(|p| p.to_string().into_boxed_str())
618 }
619}
620
621const NAME_LEN: usize = 100;
622const PREFIX_LEN: usize = 155;
623
624#[repr(C)]
625#[allow(missing_docs)]
626struct UstarHeader {
627 name: [u8; NAME_LEN],
628 mode: [u8; 8],
629 uid: [u8; 8],
630 gid: [u8; 8],
631 size: [u8; 12],
632 mtime: [u8; 12],
633 cksum: [u8; 8],
634 typeflag: [u8; 1],
635 linkname: [u8; NAME_LEN],
636 magic: [u8; 6],
637 version: [u8; 2],
638 uname: [u8; 32],
639 gname: [u8; 32],
640 dev_major: [u8; 8],
641 dev_minor: [u8; 8],
642 prefix: [u8; PREFIX_LEN],
643 pad: [u8; 12],
644}
645impl HeaderVariant for UstarHeader {}
646impl UstarHeader {
647 fn path_name(&self) -> Result<Box<str>> {
648 ustar_path_name(&self.name, &self.prefix)
649 }
650 fn link_name(&self) -> Result<Box<str>> {
651 path_name(&self.linkname).map(|p| p.to_string().into_boxed_str())
652 }
653 unsafe fn from_buf(buf: &mut [u8]) -> &mut Self {
654 buf[..BLOCK_SIZE].fill(0);
655 let hdr = &mut *(buf.as_mut_ptr() as *mut Self);
656 hdr.magic = *b"ustar\0";
657 hdr.version = *b"00";
658 hdr
659 }
660 fn set_dev_major(&mut self, major: u32) -> std::io::Result<()> {
661 format_octal(major as u64, &mut self.dev_major)
662 }
663 fn set_dev_minor(&mut self, minor: u32) -> std::io::Result<()> {
664 format_octal(minor as u64, &mut self.dev_minor)
665 }
666 fn set_uid(&mut self, uid: u32) -> std::io::Result<()> {
667 format_octal(uid as u64, &mut self.uid)
668 }
669 fn set_gid(&mut self, gid: u32) -> std::io::Result<()> {
670 format_octal(gid as u64, &mut self.gid)
671 }
672 fn set_uname(&mut self, uname: &str) {
673 self.uname.fill(0);
674 self.uname[..uname.len()].copy_from_slice(uname.as_bytes());
675 }
676 fn set_gname(&mut self, gname: &str) {
677 self.gname.fill(0);
678 self.gname[..gname.len()].copy_from_slice(gname.as_bytes());
679 }
680 fn set_mode(&mut self, mode: u32) -> std::io::Result<()> {
681 format_octal(mode as u64, &mut self.mode)
682 }
683 fn set_mtime(&mut self, mtime: u64) -> std::io::Result<()> {
684 format_octal(mtime, &mut self.mtime)
685 }
686 fn set_size(&mut self, size: u64) -> std::io::Result<()> {
687 format_octal(size, &mut self.size)
688 }
689 fn set_typeflag(&mut self, kind: Kind) {
690 self.typeflag[0] = kind.byte();
691 }
692 fn path_split_point(&mut self, path: &str) -> Option<usize> {
693 let bytes = path.as_bytes();
694 if bytes.len() <= self.name.len() {
695 return None;
696 }
697 bytes
698 .iter()
699 .enumerate()
700 .rfind(|(i, b)| **b == b'/' && i <= &self.prefix.len())
701 .map(|(i, _)| i)
702 }
703 fn set_path(&mut self, path: &str, split_pos: Option<usize>) {
704 if let Some(pos) = split_pos {
705 self.prefix[..pos].copy_from_slice(&path.as_bytes()[..pos]);
706 copy_utf8_truncate(&mut self.name, unsafe {
707 std::str::from_utf8_unchecked(&path.as_bytes()[pos + 1..])
710 });
711 } else {
712 copy_utf8_truncate(&mut self.name, path);
713 }
714 }
715 fn set_link_path(&mut self, name: &str) {
716 copy_utf8_truncate(&mut self.linkname, name);
717 }
718 fn finalize(&mut self) -> std::io::Result<()> {
719 self.cksum.fill(b' ');
720 let buf =
721 unsafe { std::slice::from_raw_parts(self as *const Self as *const u8, BLOCK_SIZE) };
722 let checksum: u32 = buf.iter().map(|b| *b as u32).sum();
723 format_octal(checksum as u64, &mut self.cksum)
724 }
725}
726
727fn copy_utf8_truncate(field: &mut [u8], bytes: &str) {
728 if bytes.len() <= field.len() {
729 field[..bytes.len()].copy_from_slice(bytes.as_bytes());
730 return;
731 }
732 let mut cut = 0;
733 for (i, c) in bytes.char_indices() {
734 if i <= field.len() {
735 if c != '/' {
736 cut = i;
737 }
738 } else {
739 break;
740 }
741 }
742 field[..cut].copy_from_slice(&bytes.as_bytes()[..cut]);
743}
744
745#[repr(C)]
746#[allow(missing_docs)]
747struct GnuHeader {
748 name: [u8; 100],
749 mode: [u8; 8],
750 uid: [u8; 8],
751 gid: [u8; 8],
752 size: [u8; 12],
753 mtime: [u8; 12],
754 cksum: [u8; 8],
755 typeflag: [u8; 1],
756 linkname: [u8; 100],
757 magic: [u8; 6],
758 version: [u8; 2],
759 uname: [u8; 32],
760 gname: [u8; 32],
761 dev_major: [u8; 8],
762 dev_minor: [u8; 8],
763 atime: [u8; 12],
764 ctime: [u8; 12],
765 offset: [u8; 12],
766 longnames: [u8; 4],
767 unused: [u8; 1],
768 sparse: [u8; 96],
769 isextended: [u8; 1],
770 realsize: [u8; 12],
771 pad: [u8; 17],
772}
773impl HeaderVariant for GnuHeader {}
774impl GnuHeader {
775 fn path_name(&self) -> Result<Box<str>> {
776 path_name(&self.name).map(|p| p.to_string().into_boxed_str())
777 }
778 fn link_name(&self) -> Result<Box<str>> {
779 path_name(&self.linkname).map(|p| p.to_string().into_boxed_str())
780 }
781 fn atime(&self) -> Result<u64> {
782 parse_octal(&self.atime).map_err(|err| {
783 Error::new(
784 ErrorKind::InvalidData,
785 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
786 )
787 })
788 }
789 fn ctime(&self) -> Result<u64> {
790 parse_octal(&self.ctime).map_err(|err| {
791 Error::new(
792 ErrorKind::InvalidData,
793 format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
794 )
795 })
796 }
797}
798
799enum Entry {
800 File {
801 path_name: Box<str>,
802 size: u64,
803 eof: u64,
804 mode: u32,
805 uid: u32,
806 gid: u32,
807 uname: Option<Box<str>>,
808 gname: Option<Box<str>>,
809 times: EntryTimes,
810 attrs: AttrList,
811 },
812 Link(TarLink),
813 Symlink(TarSymlink),
814 Directory(TarDirectory),
815 Device(TarDevice),
816 Fifo(TarFifo),
817}
818
819fn effective_size(hdr: &Header, size: Option<u64>) -> Result<u64> {
820 Ok(size.unwrap_or(hdr.size()?))
821}
822
823fn effective_uid(hdr: &Header, uid: Option<u32>) -> Result<u32> {
824 Ok(uid.unwrap_or(hdr.uid()?))
825}
826
827fn effective_gid(hdr: &Header, gid: Option<u32>) -> Result<u32> {
828 Ok(gid.unwrap_or(hdr.gid()?))
829}
830
831fn effective_uname(hdr: &Header, uname: Option<Box<str>>) -> Result<Option<Box<str>>> {
832 uname.map_or_else(|| hdr.uname(), |uname| Ok(Some(uname)))
833}
834
835fn effective_gname(hdr: &Header, gname: Option<Box<str>>) -> Result<Option<Box<str>>> {
836 gname.map_or_else(|| hdr.gname(), |gname| Ok(Some(gname)))
837}
838
839fn effective_times(hdr: &Header, info: &PaxInfo) -> Result<EntryTimes> {
840 let mut times = EntryTimes::from_mtime(hdr.mtime()?)?;
841 if let HeaderKind::Gnu(gnu) = hdr.kind() {
842 let atime = gnu.atime()?;
843 if atime != 0 {
844 times.atime = Some(unix_epoch_checked_add(Duration::from_secs(atime))?);
845 }
846 let ctime = gnu.ctime()?;
847 if ctime != 0 {
848 times.ctime = Some(unix_epoch_checked_add(Duration::from_secs(ctime))?);
849 }
850 }
851 if let Some(mtime) = info.mtime {
852 times.mtime = mtime;
853 }
854 if let Some(atime) = info.atime {
855 times.atime = Some(atime);
856 }
857 if let Some(ctime) = info.ctime {
858 times.ctime = Some(ctime);
859 }
860 Ok(times)
861}
862
863#[derive(Debug, PartialEq, Eq)]
864enum State {
865 Header,
866 Extension((u32, Kind)),
867 Entry,
868 SkipEntry,
869 Padding,
870 Eof,
871 Eoff,
872}
873use State::*;
874
875struct PosixExtension {
876 inner: Box<str>,
877}
878impl PosixExtension {
879 fn validate(ext: &str) -> Result<()> {
880 parse_pax_records(ext, |_, _| Ok(()))
881 }
882 fn for_each_record(&self, cb: impl FnMut(&str, &str) -> Result<()>) -> Result<()> {
883 parse_pax_records(&self.inner, cb)
884 }
885}
886
887impl From<Box<str>> for PosixExtension {
888 fn from(s: Box<str>) -> Self {
889 Self { inner: s }
890 }
891}
892impl std::ops::Deref for PosixExtension {
893 type Target = str;
894 fn deref(&self) -> &Self::Target {
895 &self.inner
896 }
897}
898
899enum ExtensionHeader {
900 LongName(Box<str>),
901 LongLink(Box<str>),
902 PosixExtension(PosixExtension),
903}
904
905impl std::ops::Deref for ExtensionHeader {
906 type Target = str;
907 fn deref(&self) -> &Self::Target {
908 match self {
909 ExtensionHeader::LongName(name) => name,
910 ExtensionHeader::LongLink(name) => name,
911 ExtensionHeader::PosixExtension(pax) => pax,
912 }
913 }
914}
915
916struct ExtensionBuffer {
917 buf: Vec<u8>,
918}
919
920impl ExtensionBuffer {
921 fn new(size: usize) -> Self {
922 ExtensionBuffer {
923 buf: Vec::<u8>::with_capacity(size),
924 }
925 }
926 fn as_str<I>(&self, range: I) -> Result<Box<str>>
927 where
928 I: core::slice::SliceIndex<[u8], Output = [u8]>,
929 {
930 from_utf8(&self.buf[range])
931 .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
932 .map(|p| p.to_string().into_boxed_str())
933 }
934 fn as_null_terminated_str<I>(&self, range: I) -> Result<Box<str>>
935 where
936 I: core::slice::SliceIndex<[u8], Output = [u8]>,
937 {
938 from_utf8(null_terminated(&self.buf[range]))
939 .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
940 .map(|p| p.to_string().into_boxed_str())
941 }
942 unsafe fn upto(&mut self, n: usize) -> &mut [u8] {
943 std::slice::from_raw_parts_mut(self.buf.as_mut_ptr(), n)
944 }
945 unsafe fn remaining_buf(&mut self) -> &mut [u8] {
946 let remaining = self.buf.spare_capacity_mut();
947 std::slice::from_raw_parts_mut(remaining.as_mut_ptr() as *mut u8, remaining.len())
948 }
949 unsafe fn advance(&mut self, n: usize) {
950 self.buf.set_len(self.buf.len() + n)
951 }
952}
953
954pin_project! {
955 struct TarReaderInner<'a, R> {
963 pos: u64,
965 nxt: u64,
967 state: State,
969 ext: Option<ExtensionBuffer>,
971 exts: Vec<ExtensionHeader>,
973 globs: Vec<PosixExtension>,
975 header: Header,
977 #[pin]
978 reader: R,
979 marker: std::marker::PhantomData<&'a ()>,
980 }
981}
982
983pub struct TarRegularFileReader<'a, R: AsyncRead + 'a> {
990 eof: u64,
991 inner: Arc<Mutex<Pin<Box<TarReaderInner<'a, R>>>>>,
992}
993
994impl<R: AsyncRead> Drop for TarRegularFileReader<'_, R> {
995 fn drop(&mut self) {
996 let inner = self.inner.clone();
997 let eof = self.eof;
998 let mut g = inner.lock_blocking();
999 let this_pin = g.as_mut();
1000 let this = this_pin.project();
1001 if *this.pos < eof {
1002 *this.state = SkipEntry;
1003 } else if *this.pos == eof && matches!(*this.state, Entry) {
1004 let nxt = padded_size(*this.nxt);
1005 if *this.pos == nxt {
1006 *this.nxt = nxt + BLOCK_SIZE as u64;
1007 *this.state = Header;
1008 } else {
1009 *this.nxt = nxt;
1010 *this.state = Padding;
1011 }
1012 }
1013 }
1014}
1015
1016#[cfg(feature = "smol")]
1017impl<'a, R: AsyncRead> AsyncRead for TarRegularFileReader<'a, R> {
1018 fn poll_read(
1019 self: Pin<&mut Self>,
1020 ctx: &mut Context<'_>,
1021 buf: &mut [u8],
1022 ) -> Poll<Result<usize>> {
1023 poll_regular_file_reader(self, ctx, buf)
1024 }
1025}
1026
1027#[cfg(feature = "tokio")]
1028impl<'a, R: AsyncRead> AsyncRead for TarRegularFileReader<'a, R> {
1029 fn poll_read(
1030 self: Pin<&mut Self>,
1031 ctx: &mut Context<'_>,
1032 buf: &mut ReadBuf<'_>,
1033 ) -> Poll<Result<()>> {
1034 match poll_regular_file_reader(self, ctx, buf.initialize_unfilled()) {
1035 Poll::Ready(Ok(n)) => {
1036 buf.advance(n);
1037 Poll::Ready(Ok(()))
1038 }
1039 Poll::Ready(Err(err)) => Poll::Ready(Err(err)),
1040 Poll::Pending => Poll::Pending,
1041 }
1042 }
1043}
1044
1045pub struct TarReader<'a, R: AsyncRead + 'a> {
1085 inner: Arc<Mutex<Pin<Box<TarReaderInner<'a, R>>>>>,
1086}
1087
1088impl<'a, R: AsyncRead + 'a> TarReader<'a, R> {
1089 pub fn new(r: R) -> Self {
1091 Self {
1092 inner: Arc::new(Mutex::new(Box::pin(TarReaderInner::new(r)))),
1093 }
1094 }
1095}
1096
1097#[derive(Clone, Debug, PartialEq, Eq)]
1098struct EntryTimes {
1099 mtime: SystemTime,
1100 atime: Option<SystemTime>,
1101 ctime: Option<SystemTime>,
1102}
1103
1104impl EntryTimes {
1105 fn from_mtime(mtime: u64) -> Result<Self> {
1106 Ok(Self {
1107 mtime: unix_epoch_checked_add(Duration::from_secs(mtime))?,
1108 atime: None,
1109 ctime: None,
1110 })
1111 }
1112
1113 fn mtime(&self) -> SystemTime {
1114 self.mtime
1115 }
1116
1117 fn with_mtime(mut self, mtime: SystemTime) -> Self {
1118 self.mtime = mtime;
1119 self
1120 }
1121
1122 fn with_atime(mut self, atime: SystemTime) -> Self {
1123 self.atime = Some(atime);
1124 self
1125 }
1126
1127 fn with_ctime(mut self, ctime: SystemTime) -> Self {
1128 self.ctime = Some(ctime);
1129 self
1130 }
1131}
1132
1133impl Default for EntryTimes {
1134 fn default() -> Self {
1135 Self {
1136 mtime: UNIX_EPOCH,
1137 atime: None,
1138 ctime: None,
1139 }
1140 }
1141}
1142
1143fn unix_epoch_checked_add(duration: Duration) -> Result<SystemTime> {
1144 UNIX_EPOCH
1145 .checked_add(duration)
1146 .ok_or_else(|| Error::new(ErrorKind::InvalidData, "timestamp out of range"))
1147}
1148
1149fn header_mtime_value(time: SystemTime) -> Option<u64> {
1150 let duration = time.duration_since(UNIX_EPOCH).ok()?;
1151 if duration.subsec_nanos() != 0 {
1152 return None;
1153 }
1154 Some(duration.as_secs())
1155}
1156
1157pub struct TarLink {
1159 path_name: Box<str>,
1160 link_name: Box<str>,
1161}
1162impl<R: AsyncRead> From<TarLink> for TarEntry<'_, R> {
1163 fn from(link: TarLink) -> Self {
1164 Self::Link(link)
1165 }
1166}
1167impl TarLink {
1168 pub fn new<N: Into<Box<str>>, L: Into<Box<str>>>(path_name: N, link_name: L) -> TarLink {
1173 TarLink {
1174 path_name: path_name.into(),
1175 link_name: link_name.into(),
1176 }
1177 }
1178 pub fn path(&'_ self) -> &'_ str {
1180 &self.path_name
1181 }
1182 pub fn link(&'_ self) -> &'_ str {
1184 &self.link_name
1185 }
1186 fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1187 write_header(
1188 buffer,
1189 self.path_name.as_ref(),
1190 Some(self.link_name.as_ref()),
1191 Kind::Link,
1192 0, 0, 0, 0, None, None, &EntryTimes::default(),
1199 None, &AttrList::default(),
1201 )
1202 }
1203}
1204pub enum DeviceKind {
1206 Char,
1208 Block,
1210}
1211
1212pub struct TarDevice {
1214 path_name: Box<str>,
1215 mode: u32,
1216 uid: u32,
1217 gid: u32,
1218 uname: Option<Box<str>>,
1219 gname: Option<Box<str>>,
1220 times: EntryTimes,
1221 kind: DeviceKind,
1222 major: u32,
1223 minor: u32,
1224 attrs: AttrList,
1225}
1226impl<R: AsyncRead> From<TarDevice> for TarEntry<'_, R> {
1227 fn from(device: TarDevice) -> Self {
1228 Self::Device(device)
1229 }
1230}
1231impl TarDevice {
1232 pub fn new_char<N: Into<Box<str>>>(path_name: N, major: u32, minor: u32) -> TarDevice {
1234 TarDevice {
1235 path_name: path_name.into(),
1236 mode: 0o600,
1237 uid: 0,
1238 gid: 0,
1239 uname: None,
1240 gname: None,
1241 times: EntryTimes::default(),
1242 major,
1243 minor,
1244 kind: DeviceKind::Char,
1245 attrs: AttrList::default(),
1246 }
1247 }
1248 pub fn new_block<N: Into<Box<str>>>(path_name: N, major: u32, minor: u32) -> TarDevice {
1250 TarDevice {
1251 path_name: path_name.into(),
1252 mode: 0o600,
1253 uid: 0,
1254 gid: 0,
1255 uname: None,
1256 gname: None,
1257 times: EntryTimes::default(),
1258 major,
1259 minor,
1260 kind: DeviceKind::Block,
1261 attrs: AttrList::default(),
1262 }
1263 }
1264 pub fn path(&'_ self) -> &'_ str {
1266 &self.path_name
1267 }
1268 pub fn mode(&self) -> u32 {
1270 self.mode
1271 }
1272 pub fn mtime(&self) -> SystemTime {
1274 self.times.mtime()
1275 }
1276 pub fn atime(&self) -> Option<SystemTime> {
1278 self.times.atime
1279 }
1280 pub fn ctime(&self) -> Option<SystemTime> {
1282 self.times.ctime
1283 }
1284 pub fn uid(&self) -> u32 {
1286 self.uid
1287 }
1288 pub fn uname(&self) -> &str {
1290 self.uname.as_deref().unwrap_or("")
1291 }
1292 pub fn with_uid(mut self, uid: u32) -> Self {
1294 self.uid = uid;
1295 self
1296 }
1297 pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1299 self.uname = Some(name.into());
1300 self
1301 }
1302 pub fn gid(&self) -> u32 {
1304 self.gid
1305 }
1306 pub fn gname(&self) -> &str {
1308 self.gname.as_deref().unwrap_or("")
1309 }
1310 pub fn with_gid(mut self, gid: u32) -> Self {
1312 self.gid = gid;
1313 self
1314 }
1315 pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1317 self.gname = Some(name.into());
1318 self
1319 }
1320 pub fn is_char(&self) -> bool {
1322 matches!(self.kind, DeviceKind::Char)
1323 }
1324 pub fn is_block(&self) -> bool {
1326 matches!(self.kind, DeviceKind::Block)
1327 }
1328 pub fn major(&self) -> u32 {
1330 self.major
1331 }
1332 pub fn minor(&self) -> u32 {
1334 self.minor
1335 }
1336 pub fn with_mode(mut self, mode: u32) -> Self {
1338 self.mode = mode;
1339 self
1340 }
1341 pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1343 self.times = self.times.with_mtime(mtime);
1344 self
1345 }
1346 pub fn with_atime(mut self, atime: SystemTime) -> Self {
1348 self.times = self.times.with_atime(atime);
1349 self
1350 }
1351 pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1353 self.times = self.times.with_ctime(ctime);
1354 self
1355 }
1356 pub fn attrs(&self) -> &AttrList {
1358 &self.attrs
1359 }
1360 pub fn attrs_mut(&mut self) -> &mut AttrList {
1362 &mut self.attrs
1363 }
1364 pub fn with_attrs(mut self, attrs: AttrList) -> Self {
1366 self.attrs = attrs;
1367 self
1368 }
1369 fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1370 write_header(
1371 buffer,
1372 self.path_name.as_ref(),
1373 None,
1374 match self.kind {
1375 DeviceKind::Char => Kind::CharDevice,
1376 DeviceKind::Block => Kind::BlockDevice,
1377 },
1378 0, self.mode,
1380 self.uid,
1381 self.gid,
1382 self.uname.as_deref(),
1383 self.gname.as_deref(),
1384 &self.times,
1385 Some((self.major, self.minor)),
1386 &self.attrs,
1387 )
1388 }
1389}
1390
1391pub struct TarFifo {
1393 path_name: Box<str>,
1394 mode: u32,
1395 uid: u32,
1396 gid: u32,
1397 uname: Option<Box<str>>,
1398 gname: Option<Box<str>>,
1399 times: EntryTimes,
1400 attrs: AttrList,
1401}
1402impl<R: AsyncRead> From<TarFifo> for TarEntry<'_, R> {
1403 fn from(fifo: TarFifo) -> Self {
1404 Self::Fifo(fifo)
1405 }
1406}
1407impl TarFifo {
1408 pub fn new<N: Into<Box<str>>>(path_name: N) -> TarFifo {
1410 TarFifo {
1411 path_name: path_name.into(),
1412 mode: 0o644,
1413 uid: 0,
1414 gid: 0,
1415 uname: None,
1416 gname: None,
1417 times: EntryTimes::default(),
1418 attrs: AttrList::default(),
1419 }
1420 }
1421 pub fn path(&'_ self) -> &'_ str {
1423 &self.path_name
1424 }
1425 pub fn mode(&self) -> u32 {
1427 self.mode
1428 }
1429 pub fn mtime(&self) -> SystemTime {
1431 self.times.mtime()
1432 }
1433 pub fn atime(&self) -> Option<SystemTime> {
1435 self.times.atime
1436 }
1437 pub fn ctime(&self) -> Option<SystemTime> {
1439 self.times.ctime
1440 }
1441 pub fn uid(&self) -> u32 {
1443 self.uid
1444 }
1445 pub fn uname(&self) -> &str {
1447 self.uname.as_deref().unwrap_or("")
1448 }
1449 pub fn with_uid(mut self, uid: u32) -> Self {
1451 self.uid = uid;
1452 self
1453 }
1454 pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1456 self.uname = Some(name.into());
1457 self
1458 }
1459 pub fn gid(&self) -> u32 {
1461 self.gid
1462 }
1463 pub fn gname(&self) -> &str {
1465 self.gname.as_deref().unwrap_or("")
1466 }
1467 pub fn with_gid(mut self, gid: u32) -> Self {
1469 self.gid = gid;
1470 self
1471 }
1472 pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1474 self.gname = Some(name.into());
1475 self
1476 }
1477 pub fn with_mode(mut self, mode: u32) -> Self {
1479 self.mode = mode;
1480 self
1481 }
1482 pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1484 self.times = self.times.with_mtime(mtime);
1485 self
1486 }
1487 pub fn with_atime(mut self, atime: SystemTime) -> Self {
1489 self.times = self.times.with_atime(atime);
1490 self
1491 }
1492 pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1494 self.times = self.times.with_ctime(ctime);
1495 self
1496 }
1497 pub fn attrs(&self) -> &AttrList {
1499 &self.attrs
1500 }
1501 pub fn attrs_mut(&mut self) -> &mut AttrList {
1503 &mut self.attrs
1504 }
1505 pub fn with_attrs(mut self, attrs: AttrList) -> Self {
1507 self.attrs = attrs;
1508 self
1509 }
1510 fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1511 write_header(
1512 buffer,
1513 self.path_name.as_ref(),
1514 None,
1515 Kind::Fifo,
1516 0, self.mode,
1518 self.uid,
1519 self.gid,
1520 self.uname.as_deref(),
1521 self.gname.as_deref(),
1522 &self.times,
1523 None, &self.attrs,
1525 )
1526 }
1527}
1528
1529pub struct TarSymlink {
1531 path_name: Box<str>,
1532 link_name: Box<str>,
1533 mode: u32,
1534 uid: u32,
1535 gid: u32,
1536 uname: Option<Box<str>>,
1537 gname: Option<Box<str>>,
1538 times: EntryTimes,
1539 attrs: AttrList,
1540}
1541impl<R: AsyncRead> From<TarSymlink> for TarEntry<'_, R> {
1542 fn from(symlink: TarSymlink) -> Self {
1543 Self::Symlink(symlink)
1544 }
1545}
1546impl TarSymlink {
1547 pub fn new<N: Into<Box<str>>, L: Into<Box<str>>>(path_name: N, link_name: L) -> TarSymlink {
1552 TarSymlink {
1553 path_name: path_name.into(),
1554 link_name: link_name.into(),
1555 mode: 0o777,
1556 uid: 0,
1557 gid: 0,
1558 uname: None,
1559 gname: None,
1560 times: EntryTimes::default(),
1561 attrs: AttrList::default(),
1562 }
1563 }
1564 pub fn path(&'_ self) -> &'_ str {
1566 &self.path_name
1567 }
1568 pub fn link(&'_ self) -> &'_ str {
1570 &self.link_name
1571 }
1572 pub fn mode(&self) -> u32 {
1574 self.mode
1575 }
1576 pub fn mtime(&self) -> SystemTime {
1578 self.times.mtime()
1579 }
1580 pub fn atime(&self) -> Option<SystemTime> {
1582 self.times.atime
1583 }
1584 pub fn ctime(&self) -> Option<SystemTime> {
1586 self.times.ctime
1587 }
1588 pub fn uid(&self) -> u32 {
1590 self.uid
1591 }
1592 pub fn uname(&self) -> &str {
1594 self.uname.as_deref().unwrap_or("")
1595 }
1596 pub fn with_uid(mut self, uid: u32) -> Self {
1598 self.uid = uid;
1599 self
1600 }
1601 pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1603 self.uname = Some(name.into());
1604 self
1605 }
1606 pub fn gid(&self) -> u32 {
1608 self.gid
1609 }
1610 pub fn gname(&self) -> &str {
1612 self.gname.as_deref().unwrap_or("")
1613 }
1614 pub fn with_gid(mut self, gid: u32) -> Self {
1616 self.gid = gid;
1617 self
1618 }
1619 pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1621 self.gname = Some(name.into());
1622 self
1623 }
1624 pub fn with_mode(mut self, mode: u32) -> Self {
1626 self.mode = mode;
1627 self
1628 }
1629 pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1631 self.times = self.times.with_mtime(mtime);
1632 self
1633 }
1634 pub fn with_atime(mut self, atime: SystemTime) -> Self {
1636 self.times = self.times.with_atime(atime);
1637 self
1638 }
1639 pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1641 self.times = self.times.with_ctime(ctime);
1642 self
1643 }
1644 pub fn attrs(&self) -> &AttrList {
1646 &self.attrs
1647 }
1648 pub fn attrs_mut(&mut self) -> &mut AttrList {
1650 &mut self.attrs
1651 }
1652 pub fn with_attrs(mut self, attrs: AttrList) -> Self {
1654 self.attrs = attrs;
1655 self
1656 }
1657 fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1658 write_header(
1659 buffer,
1660 self.path_name.as_ref(),
1661 Some(self.link_name.as_ref()),
1662 Kind::Symlink,
1663 0, self.mode,
1665 self.uid,
1666 self.gid,
1667 self.uname.as_deref(),
1668 self.gname.as_deref(),
1669 &self.times,
1670 None, &self.attrs,
1672 )
1673 }
1674}
1675
1676pub struct TarDirectory {
1678 path_name: Box<str>,
1679 mode: u32,
1680 uid: u32,
1681 gid: u32,
1682 uname: Option<Box<str>>,
1683 gname: Option<Box<str>>,
1684 times: EntryTimes,
1685 size: u64,
1686 attrs: AttrList,
1687}
1688impl<R: AsyncRead> From<TarDirectory> for TarEntry<'_, R> {
1689 fn from(dir: TarDirectory) -> Self {
1690 Self::Directory(dir)
1691 }
1692}
1693impl TarDirectory {
1694 pub fn new<N: Into<Box<str>>>(path_name: N) -> TarDirectory {
1698 TarDirectory {
1699 path_name: path_name.into(),
1700 size: 0,
1701 mode: 0o755,
1702 uid: 0,
1703 gid: 0,
1704 uname: None,
1705 gname: None,
1706 times: EntryTimes::default(),
1707 attrs: AttrList::default(),
1708 }
1709 }
1710 pub fn path(&'_ self) -> &'_ str {
1712 &self.path_name
1713 }
1714 pub fn size(&self) -> u64 {
1716 self.size
1717 }
1718 pub fn mode(&self) -> u32 {
1720 self.mode
1721 }
1722 pub fn mtime(&self) -> SystemTime {
1724 self.times.mtime()
1725 }
1726 pub fn atime(&self) -> Option<SystemTime> {
1728 self.times.atime
1729 }
1730 pub fn ctime(&self) -> Option<SystemTime> {
1732 self.times.ctime
1733 }
1734 pub fn uid(&self) -> u32 {
1736 self.uid
1737 }
1738 pub fn uname(&self) -> &str {
1740 self.uname.as_deref().unwrap_or("")
1741 }
1742 pub fn with_uid(mut self, uid: u32) -> Self {
1744 self.uid = uid;
1745 self
1746 }
1747 pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1749 self.uname = Some(name.into());
1750 self
1751 }
1752 pub fn gid(&self) -> u32 {
1754 self.gid
1755 }
1756 pub fn gname(&self) -> &str {
1758 self.gname.as_deref().unwrap_or("")
1759 }
1760 pub fn with_gid(mut self, gid: u32) -> Self {
1762 self.gid = gid;
1763 self
1764 }
1765 pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1767 self.gname = Some(name.into());
1768 self
1769 }
1770 pub fn with_mode(mut self, mode: u32) -> Self {
1772 self.mode = mode;
1773 self
1774 }
1775 pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1777 self.times = self.times.with_mtime(mtime);
1778 self
1779 }
1780 pub fn with_atime(mut self, atime: SystemTime) -> Self {
1782 self.times = self.times.with_atime(atime);
1783 self
1784 }
1785 pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1787 self.times = self.times.with_ctime(ctime);
1788 self
1789 }
1790 pub fn with_size(mut self, size: u64) -> Self {
1792 self.size = size;
1793 self
1794 }
1795 pub fn attrs(&self) -> &AttrList {
1797 &self.attrs
1798 }
1799 pub fn attrs_mut(&mut self) -> &mut AttrList {
1801 &mut self.attrs
1802 }
1803 pub fn with_attrs(mut self, attrs: AttrList) -> Self {
1805 self.attrs = attrs;
1806 self
1807 }
1808 fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1809 write_header(
1810 buffer,
1811 self.path_name.as_ref(),
1812 None,
1813 Kind::Directory,
1814 self.size,
1815 self.mode,
1816 self.uid,
1817 self.gid,
1818 self.uname.as_deref(),
1819 self.gname.as_deref(),
1820 &self.times,
1821 None, &self.attrs,
1823 )
1824 }
1825}
1826
1827#[derive(Clone, Debug, Default, PartialEq, Eq)]
1828pub struct AttrList {
1834 inner: Vec<(Box<str>, Box<[u8]>)>,
1835}
1836
1837impl AttrList {
1838 pub fn new() -> Self {
1840 Self { inner: Vec::new() }
1841 }
1842
1843 pub fn len(&self) -> usize {
1845 self.inner.len()
1846 }
1847
1848 pub fn is_empty(&self) -> bool {
1850 self.inner.is_empty()
1851 }
1852
1853 pub fn push<N: Into<Box<str>>, V: Into<Box<[u8]>>>(&mut self, name: N, value: V) {
1855 self.inner.push((name.into(), value.into()));
1856 }
1857
1858 pub fn with<N: Into<Box<str>>, V: Into<Box<[u8]>>>(mut self, name: N, value: V) -> Self {
1860 self.push(name, value);
1861 self
1862 }
1863
1864 pub fn iter(&self) -> impl Iterator<Item = (&str, &[u8])> {
1866 self.inner
1867 .iter()
1868 .map(|(name, value)| (name.as_ref(), value.as_ref()))
1869 }
1870}
1871
1872impl From<Vec<(Box<str>, Box<[u8]>)>> for AttrList {
1873 fn from(inner: Vec<(Box<str>, Box<[u8]>)>) -> Self {
1874 Self { inner }
1875 }
1876}
1877
1878pin_project! {
1879 pub struct TarRegularFile<'a, R> {
1885 path_name: Box<str>,
1886 size: u64,
1887 mode: u32,
1888 uid: u32,
1889 gid: u32,
1890 uname: Option<Box<str>>,
1891 gname: Option<Box<str>>,
1892 times: EntryTimes,
1893 attrs: AttrList,
1894 #[pin]
1895 inner: R,
1896 marker: std::marker::PhantomData<&'a ()>,
1897 }
1898}
1899impl<'a, R: AsyncRead + 'a> TarRegularFile<'a, R> {
1900 pub fn new<N: Into<Box<str>>>(path_name: N, size: u64, inner: R) -> TarRegularFile<'a, R> {
1904 TarRegularFile {
1905 path_name: path_name.into(),
1906 size,
1907 mode: 0o644,
1908 uid: 0,
1909 gid: 0,
1910 uname: None,
1911 gname: None,
1912 times: EntryTimes::default(),
1913 attrs: AttrList::default(),
1914 inner,
1915 marker: std::marker::PhantomData,
1916 }
1917 }
1918 pub fn size(&self) -> u64 {
1920 self.size
1921 }
1922 pub fn path(&'_ self) -> &'_ str {
1924 &self.path_name
1925 }
1926 pub fn mode(&self) -> u32 {
1928 self.mode
1929 }
1930 pub fn mtime(&self) -> SystemTime {
1932 self.times.mtime()
1933 }
1934 pub fn atime(&self) -> Option<SystemTime> {
1936 self.times.atime
1937 }
1938 pub fn ctime(&self) -> Option<SystemTime> {
1940 self.times.ctime
1941 }
1942 pub fn uid(&self) -> u32 {
1944 self.uid
1945 }
1946 pub fn uname(&self) -> &str {
1948 self.uname.as_deref().unwrap_or("")
1949 }
1950 pub fn with_uid(mut self, uid: u32) -> Self {
1952 self.uid = uid;
1953 self
1954 }
1955 pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1957 self.uname = Some(name.into());
1958 self
1959 }
1960 pub fn gid(&self) -> u32 {
1962 self.gid
1963 }
1964 pub fn gname(&self) -> &str {
1966 self.gname.as_deref().unwrap_or("")
1967 }
1968 pub fn with_gid(mut self, gid: u32) -> Self {
1970 self.gid = gid;
1971 self
1972 }
1973 pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1975 self.gname = Some(name.into());
1976 self
1977 }
1978 pub fn with_mode(mut self, mode: u32) -> Self {
1980 self.mode = mode;
1981 self
1982 }
1983 pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1985 self.times = self.times.with_mtime(mtime);
1986 self
1987 }
1988 pub fn with_atime(mut self, atime: SystemTime) -> Self {
1990 self.times = self.times.with_atime(atime);
1991 self
1992 }
1993 pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1995 self.times = self.times.with_ctime(ctime);
1996 self
1997 }
1998 pub fn attrs(&self) -> &AttrList {
2000 &self.attrs
2001 }
2002 pub fn attrs_mut(&mut self) -> &mut AttrList {
2004 &mut self.attrs
2005 }
2006 pub fn with_attrs(mut self, attrs: AttrList) -> Self {
2008 self.attrs = attrs;
2009 self
2010 }
2011 fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
2012 write_header(
2013 buffer,
2014 self.path_name.as_ref(),
2015 None,
2016 Kind::File,
2017 self.size,
2018 self.mode,
2019 self.uid,
2020 self.gid,
2021 self.uname.as_deref(),
2022 self.gname.as_deref(),
2023 &self.times,
2024 None, &self.attrs,
2026 )
2027 }
2028}
2029impl<'a, R: AsyncRead + 'a> From<TarRegularFile<'a, R>> for TarEntry<'a, R> {
2030 fn from(file: TarRegularFile<'a, R>) -> Self {
2031 Self::File(file)
2032 }
2033}
2034pub enum TarEntry<'a, R: AsyncRead + 'a> {
2036 File(TarRegularFile<'a, R>),
2038 Link(TarLink),
2040 Symlink(TarSymlink),
2042 Directory(TarDirectory),
2044 Device(TarDevice),
2046 Fifo(TarFifo),
2048}
2049
2050impl<'a, R: AsyncRead + 'a> TarEntry<'a, R> {
2051 fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
2052 match self {
2053 Self::Directory(dir) => dir.write_header(buffer),
2054 Self::Device(device) => device.write_header(buffer),
2055 Self::Fifo(fifo) => fifo.write_header(buffer),
2056 Self::File(file) => file.write_header(buffer),
2057 Self::Link(link) => link.write_header(buffer),
2058 Self::Symlink(symlink) => symlink.write_header(buffer),
2059 }
2060 }
2061}
2062
2063impl<'a, R: AsyncRead + 'a> std::fmt::Debug for TarEntry<'a, R> {
2064 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2065 match self {
2066 Self::File(file) => f
2067 .debug_struct("TarEntry::File")
2068 .field("path_name", &file.path_name)
2069 .field("size", &file.size)
2070 .field("mode", &file.mode)
2071 .field("mtime", &file.mtime())
2072 .field("atime", &file.times.atime)
2073 .field("ctime", &file.times.ctime)
2074 .field("uid", &file.uid)
2075 .field("gid", &file.gid)
2076 .field("uname", &file.uname)
2077 .field("gname", &file.gname)
2078 .field("attrs", &file.attrs.len())
2079 .finish(),
2080 Self::Device(device) => f
2081 .debug_struct("TarEntry::Device")
2082 .field("path_name", &device.path_name)
2083 .field("mode", &device.mode)
2084 .field("mtime", &device.mtime())
2085 .field("atime", &device.times.atime)
2086 .field("ctime", &device.times.ctime)
2087 .field("uid", &device.uid)
2088 .field("gid", &device.gid)
2089 .field("uname", &device.uname)
2090 .field("gname", &device.gname)
2091 .field("attrs", &device.attrs.len())
2092 .field(
2093 "kind",
2094 match device.kind {
2095 DeviceKind::Char => &"char",
2096 DeviceKind::Block => &"block",
2097 },
2098 )
2099 .field("major", &device.major)
2100 .field("minor", &device.minor)
2101 .finish(),
2102 Self::Fifo(fifo) => f
2103 .debug_struct("TarEntry::Fifo")
2104 .field("path_name", &fifo.path_name)
2105 .field("mode", &fifo.mode)
2106 .field("mtime", &fifo.mtime())
2107 .field("atime", &fifo.times.atime)
2108 .field("ctime", &fifo.times.ctime)
2109 .field("uid", &fifo.uid)
2110 .field("gid", &fifo.gid)
2111 .field("uname", &fifo.uname)
2112 .field("gname", &fifo.gname)
2113 .field("attrs", &fifo.attrs.len())
2114 .finish(),
2115 Self::Link(link) => f
2116 .debug_struct("TarEntry::Link")
2117 .field("path_name", &link.path_name)
2118 .field("link_name", &link.link_name)
2119 .finish(),
2120 Self::Symlink(symlink) => f
2121 .debug_struct("TarEntry::Symlink")
2122 .field("path_name", &symlink.path_name)
2123 .field("link_name", &symlink.link_name)
2124 .field("mode", &symlink.mode)
2125 .field("mtime", &symlink.mtime())
2126 .field("atime", &symlink.times.atime)
2127 .field("ctime", &symlink.times.ctime)
2128 .field("uid", &symlink.uid)
2129 .field("gid", &symlink.gid)
2130 .field("uname", &symlink.uname)
2131 .field("gname", &symlink.gname)
2132 .field("attrs", &symlink.attrs.len())
2133 .finish(),
2134 Self::Directory(dir) => f
2135 .debug_struct("TarEntry::Directory")
2136 .field("path_name", &dir.path_name)
2137 .field("size", &dir.size)
2138 .field("mode", &dir.mode)
2139 .field("mtime", &dir.mtime())
2140 .field("atime", &dir.times.atime)
2141 .field("ctime", &dir.times.ctime)
2142 .field("uid", &dir.uid)
2143 .field("gid", &dir.gid)
2144 .field("uname", &dir.uname)
2145 .field("gname", &dir.gname)
2146 .field("attrs", &dir.attrs.len())
2147 .finish(),
2148 }
2149 }
2150}
2151
2152struct PaxInfo {
2153 path: Option<Box<str>>,
2154 linkpath: Option<Box<str>>,
2155 atime: Option<SystemTime>,
2156 ctime: Option<SystemTime>,
2157 mtime: Option<SystemTime>,
2158 size: Option<u64>,
2159 uid: Option<u32>,
2160 gid: Option<u32>,
2161 uname: Option<Box<str>>,
2162 gname: Option<Box<str>>,
2163 attrs: AttrList,
2164}
2165
2166fn entry_path(hdr: &Header, path: Option<Box<str>>) -> Result<Box<str>> {
2167 match hdr.kind() {
2168 HeaderKind::Gnu(hdr) => Ok(path.map_or_else(|| hdr.path_name(), Ok)?),
2169 HeaderKind::Ustar(hdr) => Ok(path.map_or_else(|| hdr.path_name(), Ok)?),
2170 HeaderKind::Old(hdr) => path.map_or_else(|| hdr.path_name(), Ok),
2171 }
2172}
2173
2174fn entry_path_link(
2175 hdr: &Header,
2176 path: Option<Box<str>>,
2177 link: Option<Box<str>>,
2178) -> Result<(Box<str>, Box<str>)> {
2179 match hdr.kind() {
2180 HeaderKind::Gnu(hdr) => Ok((
2181 path.map_or_else(|| hdr.path_name(), Ok)?,
2182 link.map_or_else(|| hdr.link_name(), Ok)?,
2183 )),
2184 HeaderKind::Ustar(hdr) => Ok((
2185 path.map_or_else(|| hdr.path_name(), Ok)?,
2186 link.map_or_else(|| hdr.link_name(), Ok)?,
2187 )),
2188 HeaderKind::Old(hdr) => Ok((
2189 path.map_or_else(|| hdr.path_name(), Ok)?,
2190 link.map_or_else(|| hdr.link_name(), Ok)?,
2191 )),
2192 }
2193}
2194
2195fn ext_as_path(hdr: &Header, size: usize, ext: &Option<ExtensionBuffer>) -> Result<Box<str>> {
2196 if size <= BLOCK_SIZE {
2197 hdr.as_null_terminated_str(..size)
2198 } else {
2199 ext.as_ref().unwrap().as_null_terminated_str(..size)
2200 }
2201}
2202fn ext_as_str(hdr: &Header, size: usize, ext: &Option<ExtensionBuffer>) -> Result<Box<str>> {
2203 if size <= BLOCK_SIZE {
2204 hdr.as_str(..size)
2205 } else {
2206 ext.as_ref().unwrap().as_str(..size)
2207 }
2208 .map(|p| p.to_string().into_boxed_str())
2209}
2210
2211fn take_pax_info(exts: &mut Vec<ExtensionHeader>, globs: &[PosixExtension]) -> Result<PaxInfo> {
2212 let mut info = PaxInfo {
2213 path: None,
2214 linkpath: None,
2215 atime: None,
2216 ctime: None,
2217 mtime: None,
2218 size: None,
2219 uid: None,
2220 gid: None,
2221 uname: None,
2222 gname: None,
2223 attrs: AttrList::default(),
2224 };
2225 for glob in globs {
2226 apply_pax_extension(glob, &mut info)?;
2227 }
2228 for ext in exts.drain(..) {
2229 match ext {
2230 ExtensionHeader::LongName(name) => info.path = Some(name),
2231 ExtensionHeader::LongLink(name) => info.linkpath = Some(name),
2232 ExtensionHeader::PosixExtension(ext) => apply_pax_extension(&ext, &mut info)?,
2233 }
2234 }
2235 Ok(info)
2236}
2237
2238fn apply_pax_extension(ext: &PosixExtension, info: &mut PaxInfo) -> Result<()> {
2239 ext.for_each_record(|key, val| {
2240 if key == "path" {
2241 info.path = Some(val.to_string().into_boxed_str());
2242 } else if key == "linkpath" {
2243 info.linkpath = Some(val.to_string().into_boxed_str());
2244 } else if key == "atime" {
2245 info.atime = Some(parse_pax_time(val)?);
2246 } else if key == "ctime" {
2247 info.ctime = Some(parse_pax_time(val)?);
2248 } else if key == "mtime" {
2249 info.mtime = Some(parse_pax_time(val)?);
2250 } else if key == "size" {
2251 info.size = Some(parse_pax_u64(val, "size")?);
2252 } else if key == "uid" {
2253 info.uid = Some(parse_pax_u32(val, "uid")?);
2254 } else if key == "gid" {
2255 info.gid = Some(parse_pax_u32(val, "gid")?);
2256 } else if key == "uname" {
2257 info.uname = Some(val.to_string().into_boxed_str());
2258 } else if key == "gname" {
2259 info.gname = Some(val.to_string().into_boxed_str());
2260 } else if let Some(name) = key.strip_prefix("SCHILY.xattr.") {
2261 attr_set(&mut info.attrs, name, val.as_bytes());
2262 }
2263 Ok(())
2264 })
2265}
2266
2267fn parse_pax_u64(val: &str, key: &str) -> Result<u64> {
2268 val.parse::<u64>().map_err(|_| {
2269 Error::new(
2270 ErrorKind::InvalidData,
2271 format!("invalid PAX {key} value: {val}"),
2272 )
2273 })
2274}
2275
2276fn parse_pax_u32(val: &str, key: &str) -> Result<u32> {
2277 let parsed = parse_pax_u64(val, key)?;
2278 parsed.try_into().map_err(|_| {
2279 Error::new(
2280 ErrorKind::InvalidData,
2281 format!("PAX {key} value out of range: {val}"),
2282 )
2283 })
2284}
2285
2286fn parse_pax_time(val: &str) -> Result<SystemTime> {
2287 let (seconds, fraction) = match val.split_once('.') {
2288 Some((seconds, fraction)) => (seconds, Some(fraction)),
2289 None => (val, None),
2290 };
2291 let nanos = match fraction {
2292 None => 0,
2293 Some(fraction) => {
2294 if fraction.is_empty() || !fraction.bytes().all(|b| b.is_ascii_digit()) {
2295 return Err(Error::new(
2296 ErrorKind::InvalidData,
2297 format!("invalid PAX timestamp value: {val}"),
2298 ));
2299 }
2300 if fraction.len() > 9 && fraction[9..].bytes().any(|b| b != b'0') {
2301 return Err(Error::new(
2302 ErrorKind::InvalidData,
2303 format!("PAX timestamp exceeds nanosecond precision: {val}"),
2304 ));
2305 }
2306 let digits = &fraction[..fraction.len().min(9)];
2307 let scale = 9 - digits.len();
2308 let raw = digits.parse::<u32>().map_err(|_| {
2309 Error::new(
2310 ErrorKind::InvalidData,
2311 format!("invalid PAX timestamp value: {val}"),
2312 )
2313 })?;
2314 raw.saturating_mul(10u32.pow(scale as u32))
2315 }
2316 };
2317 if let Some(seconds) = seconds.strip_prefix('-') {
2318 let whole = seconds.parse::<u64>().map_err(|_| {
2319 Error::new(
2320 ErrorKind::InvalidData,
2321 format!("invalid PAX timestamp value: {val}"),
2322 )
2323 })?;
2324 UNIX_EPOCH
2325 .checked_sub(Duration::new(whole, nanos))
2326 .ok_or_else(|| {
2327 Error::new(
2328 ErrorKind::InvalidData,
2329 format!("invalid PAX timestamp value: {val}"),
2330 )
2331 })
2332 } else {
2333 let whole = seconds.parse::<u64>().map_err(|_| {
2334 Error::new(
2335 ErrorKind::InvalidData,
2336 format!("invalid PAX timestamp value: {val}"),
2337 )
2338 })?;
2339 UNIX_EPOCH
2340 .checked_add(Duration::new(whole, nanos))
2341 .ok_or_else(|| {
2342 Error::new(
2343 ErrorKind::InvalidData,
2344 format!("invalid PAX timestamp value: {val}"),
2345 )
2346 })
2347 }
2348}
2349
2350fn format_pax_time(time: SystemTime) -> String {
2351 let (negative, delta) = match time.duration_since(UNIX_EPOCH) {
2352 Ok(delta) => (false, delta),
2353 Err(_) => (true, UNIX_EPOCH.duration_since(time).unwrap()),
2354 };
2355 let whole = delta.as_secs();
2356 let nanos = delta.subsec_nanos();
2357 let sign = if negative { "-" } else { "" };
2358 if nanos == 0 {
2359 format!("{sign}{whole}")
2360 } else {
2361 format!("{sign}{whole}.{nanos:09}")
2362 .trim_end_matches('0')
2363 .to_string()
2364 }
2365}
2366
2367fn parse_pax_records<'a>(
2368 mut ext: &'a str,
2369 mut cb: impl FnMut(&'a str, &'a str) -> Result<()>,
2370) -> Result<()> {
2371 while !ext.is_empty() {
2372 let space_pos = ext.find(' ').ok_or_else(|| {
2373 Error::new(
2374 ErrorKind::InvalidData,
2375 "malformed PAX record: missing length separator",
2376 )
2377 })?;
2378 let len = ext[..space_pos].parse::<usize>().map_err(|_| {
2379 Error::new(
2380 ErrorKind::InvalidData,
2381 "malformed PAX record: invalid length",
2382 )
2383 })?;
2384 if len == 0 || len > ext.len() || len <= space_pos + 2 || !ext.is_char_boundary(len) {
2385 return Err(Error::new(
2386 ErrorKind::InvalidData,
2387 "malformed PAX record: invalid length",
2388 ));
2389 }
2390 let record = &ext[..len];
2391 ext = &ext[len..];
2392 if !record.ends_with('\n') {
2393 return Err(Error::new(
2394 ErrorKind::InvalidData,
2395 "malformed PAX record: missing trailing newline",
2396 ));
2397 }
2398 let payload = &record[space_pos + 1..len - 1];
2399 let eq_pos = payload.find('=').ok_or_else(|| {
2400 Error::new(ErrorKind::InvalidData, "malformed PAX record: missing '='")
2401 })?;
2402 cb(&payload[..eq_pos], &payload[eq_pos + 1..])?;
2403 }
2404 Ok(())
2405}
2406
2407fn attr_set(attrs: &mut AttrList, name: &str, value: &[u8]) {
2408 if let Some(pos) = attrs
2409 .inner
2410 .iter()
2411 .position(|(existing, _)| existing.as_ref() == name)
2412 {
2413 attrs.inner.remove(pos);
2414 }
2415 attrs.inner.push((
2416 name.to_string().into_boxed_str(),
2417 value.to_vec().into_boxed_slice(),
2418 ));
2419}
2420
2421impl<'a, R: AsyncRead + 'a> TarReaderInner<'a, R> {
2422 fn new(r: R) -> Self {
2423 Self {
2424 state: Header,
2425 pos: 0,
2426 nxt: BLOCK_SIZE as u64,
2427 ext: None,
2428 exts: Vec::new(),
2429 globs: Vec::new(),
2430 header: Header::new(),
2431 reader: r,
2432 marker: std::marker::PhantomData,
2433 }
2434 }
2435 fn poll_read_header(
2437 self: Pin<&mut Self>,
2438 ctx: &mut Context<'_>,
2439 ) -> Poll<Option<Result<Entry>>> {
2440 let mut this = self.project();
2441 loop {
2442 match this.state {
2443 Header => {
2444 let remaining = *this.nxt - *this.pos;
2445 let n = {
2446 let filled = BLOCK_SIZE - remaining as usize;
2447 let (hdr, reader) = (&mut this.header, &mut this.reader);
2448 let n =
2449 ready_opt!(poll_read_compat(pin!(reader), ctx, hdr.buf_mut(filled..)));
2450 if n == 0 {
2451 Err(Error::new(
2452 ErrorKind::UnexpectedEof,
2453 "Unexpected EOF while reading tar header",
2454 ))
2455 } else {
2456 Ok(n)
2457 }
2458 }?;
2459 *this.pos += n as u64;
2460 if remaining != n as u64 {
2461 continue;
2462 }
2463 if this.header.is_zero() {
2464 *this.nxt += BLOCK_SIZE as u64;
2465 *this.state = Eof;
2466 continue;
2467 }
2468 this.header.validate_checksum()?;
2469 let kind = this.header.entry_type().map_err(|t| {
2470 Error::new(
2471 ErrorKind::InvalidData,
2472 format!("header type {} is not supported", t),
2473 )
2474 })?;
2475 return Poll::Ready(Some(match kind {
2476 Kind::File | Kind::File0 | Kind::Continous => {
2477 let info = take_pax_info(this.exts, this.globs)?;
2478 let path_name = entry_path(this.header, info.path.clone())?;
2479 let size = effective_size(this.header, info.size)?;
2480 let uid = effective_uid(this.header, info.uid)?;
2481 let gid = effective_gid(this.header, info.gid)?;
2482 let times = effective_times(this.header, &info)?;
2483 let uname = effective_uname(this.header, info.uname)?;
2484 let gname = effective_gname(this.header, info.gname)?;
2485 Ok(if path_name.ends_with('/') && this.header.is_old() {
2486 *this.nxt += BLOCK_SIZE as u64;
2487 *this.state = Header;
2488 Entry::Directory(TarDirectory {
2489 size,
2490 mode: this.header.mode()?,
2491 uid,
2492 gid,
2493 uname,
2494 gname,
2495 times,
2496 path_name,
2497 attrs: info.attrs,
2498 })
2499 } else {
2500 *this.nxt += size;
2501 *this.state = Entry;
2502 Entry::File {
2503 size,
2504 mode: this.header.mode()?,
2505 uid,
2506 gid,
2507 uname,
2508 gname,
2509 times,
2510 eof: *this.nxt,
2511 path_name,
2512 attrs: info.attrs,
2513 }
2514 })
2515 }
2516 Kind::Directory => {
2517 let info = take_pax_info(this.exts, this.globs)?;
2518 let size = effective_size(this.header, info.size)?;
2519 let uid = effective_uid(this.header, info.uid)?;
2520 let gid = effective_gid(this.header, info.gid)?;
2521 let times = effective_times(this.header, &info)?;
2522 let uname = effective_uname(this.header, info.uname)?;
2523 let gname = effective_gname(this.header, info.gname)?;
2524 *this.nxt += BLOCK_SIZE as u64;
2525 *this.state = Header;
2526 let path_name = entry_path(this.header, info.path)?;
2527 Ok(Entry::Directory(TarDirectory {
2528 size,
2529 mode: this.header.mode()?,
2530 uid,
2531 gid,
2532 uname,
2533 gname,
2534 times,
2535 path_name,
2536 attrs: info.attrs,
2537 }))
2538 }
2539 Kind::Fifo => {
2540 let info = take_pax_info(this.exts, this.globs)?;
2541 let uid = effective_uid(this.header, info.uid)?;
2542 let gid = effective_gid(this.header, info.gid)?;
2543 let times = effective_times(this.header, &info)?;
2544 let uname = effective_uname(this.header, info.uname)?;
2545 let gname = effective_gname(this.header, info.gname)?;
2546 *this.nxt += BLOCK_SIZE as u64;
2547 *this.state = Header;
2548 let path_name = entry_path(this.header, info.path)?;
2549 Ok(Entry::Fifo(TarFifo {
2550 path_name,
2551 mode: this.header.mode()?,
2552 uid,
2553 gid,
2554 uname,
2555 gname,
2556 times,
2557 attrs: info.attrs,
2558 }))
2559 }
2560 Kind::CharDevice | Kind::BlockDevice => {
2561 let info = take_pax_info(this.exts, this.globs)?;
2562 let uid = effective_uid(this.header, info.uid)?;
2563 let gid = effective_gid(this.header, info.gid)?;
2564 let times = effective_times(this.header, &info)?;
2565 let uname = effective_uname(this.header, info.uname)?;
2566 let gname = effective_gname(this.header, info.gname)?;
2567 *this.nxt += BLOCK_SIZE as u64;
2568 *this.state = Header;
2569 let path_name = entry_path(this.header, info.path)?;
2570 Ok(Entry::Device(TarDevice {
2571 path_name,
2572 mode: this.header.mode()?,
2573 uid,
2574 gid,
2575 uname,
2576 gname,
2577 times,
2578 kind: match kind {
2579 Kind::CharDevice => DeviceKind::Char,
2580 Kind::BlockDevice => DeviceKind::Block,
2581 _ => unreachable!(),
2582 },
2583 major: this.header.dev_major()?,
2584 minor: this.header.dev_minor()?,
2585 attrs: info.attrs,
2586 }))
2587 }
2588 Kind::Link => {
2589 let info = take_pax_info(this.exts, this.globs)?;
2590 *this.nxt += BLOCK_SIZE as u64;
2591 *this.state = Header;
2592 let (path_name, link_name) =
2593 entry_path_link(this.header, info.path, info.linkpath)?;
2594 let _ = info.attrs;
2595 Ok(Entry::Link(TarLink {
2596 path_name,
2597 link_name,
2598 }))
2599 }
2600 Kind::Symlink => {
2601 let info = take_pax_info(this.exts, this.globs)?;
2602 let uid = effective_uid(this.header, info.uid)?;
2603 let gid = effective_gid(this.header, info.gid)?;
2604 let times = effective_times(this.header, &info)?;
2605 let uname = effective_uname(this.header, info.uname)?;
2606 let gname = effective_gname(this.header, info.gname)?;
2607 *this.nxt += BLOCK_SIZE as u64;
2608 *this.state = Header;
2609 let (path_name, link_name) =
2610 entry_path_link(this.header, info.path, info.linkpath)?;
2611 Ok(Entry::Symlink(TarSymlink {
2612 mode: this.header.mode()?,
2613 uid,
2614 gid,
2615 uname,
2616 gname,
2617 times,
2618 path_name,
2619 link_name,
2620 attrs: info.attrs,
2621 }))
2622 }
2623 Kind::PAXLocal | Kind::PAXGlobal if this.header.is_ustar() => {
2624 let size = this.header.size().and_then(|size| {
2625 if size as usize > PAX_HEADER_MAX_SIZE {
2626 Err(Error::new(
2627 ErrorKind::InvalidData,
2628 format!(
2629 "PAX extension exceeds {PAX_HEADER_MAX_SIZE} bytes"
2630 ),
2631 ))
2632 } else {
2633 Ok(size as usize)
2634 }
2635 })?;
2636 *this.state = Extension((size as u32, kind));
2637 let padded = padded_size(size as u64);
2638 *this.nxt += padded;
2639 if size > BLOCK_SIZE {
2640 this.ext.replace(ExtensionBuffer::new(padded as usize));
2641 }
2642 continue;
2643 }
2644 Kind::GNULongName | Kind::GNULongLink if this.header.is_gnu() => {
2645 let size = this.header.size().and_then(|size| {
2646 if size as usize > PATH_MAX {
2647 Err(Error::new(
2648 ErrorKind::InvalidData,
2649 format!("long filename exceeds {PATH_MAX} bytes"),
2650 ))
2651 } else {
2652 Ok(size as usize)
2653 }
2654 })?;
2655 *this.state = Extension((size as u32, kind));
2656 let padded = padded_size(size as u64);
2657 *this.nxt += padded;
2658 if size > BLOCK_SIZE {
2659 this.ext.replace(ExtensionBuffer::new(padded as usize));
2660 }
2661 continue;
2662 }
2663 kind => Err(Error::new(
2664 ErrorKind::InvalidData,
2665 format!("invalid tar entry header {}", kind),
2666 )),
2667 }));
2668 }
2669 Extension((size, kind)) => {
2670 let (ext, reader) = (&mut this.ext, &mut this.reader);
2671 let n = if *size as usize <= BLOCK_SIZE {
2672 let remaining = *this.nxt - *this.pos;
2673 let filled = BLOCK_SIZE - remaining as usize;
2674 let (hdr, reader) = (&mut this.header, &mut this.reader);
2675 ready_opt!(poll_read_compat(pin!(reader), ctx, hdr.buf_mut(filled..)))
2676 } else {
2677 let buf = ext.as_mut().unwrap();
2678 let n = ready_opt!(poll_read_compat(pin!(reader), ctx, unsafe {
2679 buf.remaining_buf()
2680 }));
2681 unsafe { buf.advance(n) };
2682 n
2683 };
2684 *this.pos += if n == 0 {
2685 Err(Error::new(
2686 ErrorKind::UnexpectedEof,
2687 "unexpected end of tar file",
2688 ))
2689 } else {
2690 Ok(n as u64)
2691 }?;
2692 if *this.pos == *this.nxt {
2693 match kind {
2694 Kind::GNULongName => this.exts.push(ExtensionHeader::LongName(
2695 ext_as_path(this.header, *size as usize, ext)?,
2696 )),
2697 Kind::GNULongLink => this.exts.push(ExtensionHeader::LongLink(
2698 ext_as_path(this.header, *size as usize, ext)?,
2699 )),
2700 Kind::PAXLocal => {
2701 let ext = ext_as_str(this.header, *size as usize, ext)?;
2702 PosixExtension::validate(&ext)?;
2703 this.exts.push(ExtensionHeader::PosixExtension(ext.into()));
2704 }
2705 Kind::PAXGlobal => {
2706 let ext = ext_as_str(this.header, *size as usize, ext)?;
2707 PosixExtension::validate(&ext)?;
2708 this.globs.push(ext.into());
2709 }
2710 _ => unreachable!(),
2711 };
2712 *this.nxt += BLOCK_SIZE as u64;
2713 *this.state = Header;
2714 }
2715 continue;
2716 }
2717 Padding => {
2718 let remaining = *this.nxt - *this.pos;
2719 let (hdr, reader) = (&mut this.header, &mut this.reader);
2720 let n = match ready_opt!(poll_read_compat(
2721 pin!(reader),
2722 ctx,
2723 hdr.buf_mut(..remaining as usize)
2724 )) {
2725 0 => Err(Error::new(
2726 ErrorKind::UnexpectedEof,
2727 "unexpected end of tar file",
2728 )),
2729 n => Ok(n as u64),
2730 }?;
2731 *this.pos += n;
2732 if remaining == n {
2733 *this.nxt = *this.pos + BLOCK_SIZE as u64;
2734 *this.state = Header;
2735 }
2736 continue;
2737 }
2738 Entry => {
2739 return Poll::Pending;
2742 }
2743 SkipEntry => {
2744 let nxt = padded_size(*this.nxt);
2746 let remaining =
2747 std::cmp::min(SKIP_BUFFER_SIZE as u64, nxt - *this.pos) as usize;
2748 let n = if remaining > 0 {
2749 let buf = if let Some(buf) = this.ext.as_mut() {
2750 buf
2751 } else {
2752 this.ext.replace(ExtensionBuffer::new(SKIP_BUFFER_SIZE));
2753 this.ext.as_mut().unwrap()
2754 };
2755 let reader = &mut this.reader;
2756 match ready_opt!(poll_read_compat(pin!(reader), ctx, unsafe {
2757 buf.upto(remaining)
2758 })) {
2759 0 => Err(Error::new(
2760 ErrorKind::UnexpectedEof,
2761 "unexpected end of tar file",
2762 )),
2763
2764 n => Ok(n as u64),
2765 }
2766 } else {
2767 Ok(0)
2768 }?;
2769 *this.pos += n;
2770 if *this.pos == nxt {
2771 this.ext.take();
2772 *this.nxt = *this.pos + BLOCK_SIZE as u64;
2773 *this.state = Header;
2774 }
2775 continue;
2776 }
2777 Eof => {
2778 let remaining = *this.nxt - *this.pos;
2779 let filled = BLOCK_SIZE - remaining as usize;
2780 let (hdr, reader) = (&mut this.header, &mut this.reader);
2781 let n = match ready_opt!(poll_read_compat(
2782 pin!(reader),
2783 ctx,
2784 hdr.buf_mut(filled..)
2785 )) {
2786 0 => Err(Error::new(
2787 ErrorKind::UnexpectedEof,
2788 "unexpected end of tar file",
2789 )),
2790 n => Ok(n as u64),
2791 }?;
2792 *this.pos += n;
2793 if remaining > n {
2794 continue;
2795 }
2796 return Poll::Ready(if hdr.is_zero() {
2797 *this.state = Eoff;
2798 None
2799 } else {
2800 *this.state = Eoff;
2801 Some(Err(Error::new(
2802 ErrorKind::InvalidData,
2803 "unexpected data after first zero block",
2804 )))
2805 });
2806 }
2807 Eoff => {
2808 return Poll::Ready(Some(Err(Error::new(
2809 ErrorKind::InvalidData,
2810 "unexpected read after EOF",
2811 ))));
2812 }
2813 }
2814 }
2815 }
2816}
2817
2818impl<'a, R: AsyncRead + 'a> Stream for TarReader<'a, R> {
2819 type Item = Result<TarEntry<'a, TarRegularFileReader<'a, R>>>;
2820 fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
2821 let this = self.as_mut();
2822 let fut = this.inner.lock();
2823 let mut g = task::ready!(pin!(fut).poll(ctx));
2824 let inner: Pin<&mut TarReaderInner<R>> = g.as_mut();
2825 let entry = {
2826 match inner.poll_next(ctx) {
2827 Poll::Pending => return Poll::Pending,
2828 Poll::Ready(None) => return Poll::Ready(None),
2829 Poll::Ready(Some(Err(err))) => return Poll::Ready(Some(Err(err))),
2830 Poll::Ready(Some(Ok(data))) => data,
2831 }
2832 };
2833 Poll::Ready(Some(Ok(match entry {
2834 Entry::File {
2835 size,
2836 mode,
2837 uid,
2838 gid,
2839 uname,
2840 gname,
2841 times,
2842 eof,
2843 path_name,
2844 attrs,
2845 } => TarEntry::File(TarRegularFile {
2846 path_name,
2847 size,
2848 mode,
2849 uid,
2850 gid,
2851 uname,
2852 gname,
2853 times,
2854 attrs,
2855 inner: TarRegularFileReader {
2856 eof,
2857 inner: Arc::clone(&this.inner),
2858 },
2859 marker: std::marker::PhantomData,
2860 }),
2861 Entry::Directory(d) => TarEntry::Directory(d),
2862 Entry::Link(l) => TarEntry::Link(l),
2863 Entry::Symlink(l) => TarEntry::Symlink(l),
2864 Entry::Device(d) => TarEntry::Device(d),
2865 Entry::Fifo(f) => TarEntry::Fifo(f),
2866 })))
2867 }
2868}
2869
2870impl<'a, R: AsyncRead + 'a> Stream for TarReaderInner<'a, R> {
2871 type Item = Result<Entry>;
2872 fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
2873 self.as_mut().poll_read_header(ctx)
2874 }
2875}
2876impl<'a, R: AsyncRead + 'a> TarEntry<'a, R> {
2877 pub fn path(&'_ self) -> &'_ str {
2879 match self {
2880 Self::File(f) => &f.path_name,
2881 Self::Link(l) => &l.path_name,
2882 Self::Symlink(l) => &l.path_name,
2883 Self::Directory(d) => &d.path_name,
2884 Self::Device(d) => &d.path_name,
2885 Self::Fifo(f) => &f.path_name,
2886 }
2887 }
2888}
2889
2890#[cfg(feature = "smol")]
2891impl<'a, R: AsyncRead + 'a> AsyncRead for TarRegularFile<'a, R> {
2892 fn poll_read(
2893 self: Pin<&mut Self>,
2894 ctx: &mut Context<'_>,
2895 buf: &mut [u8],
2896 ) -> Poll<std::io::Result<usize>> {
2897 let this = self.project();
2898 poll_read_compat(pin!(this.inner), ctx, buf)
2899 }
2900}
2901
2902#[cfg(feature = "tokio")]
2903impl<'a, R: AsyncRead + 'a> AsyncRead for TarRegularFile<'a, R> {
2904 fn poll_read(
2905 self: Pin<&mut Self>,
2906 ctx: &mut Context<'_>,
2907 buf: &mut ReadBuf<'_>,
2908 ) -> Poll<std::io::Result<()>> {
2909 let this = self.project();
2910 match poll_read_compat(pin!(this.inner), ctx, buf.initialize_unfilled()) {
2911 Poll::Ready(Ok(n)) => {
2912 buf.advance(n);
2913 Poll::Ready(Ok(()))
2914 }
2915 Poll::Ready(Err(err)) => Poll::Ready(Err(err)),
2916 Poll::Pending => Poll::Pending,
2917 }
2918 }
2919}
2920
2921fn null_terminated(bytes: &[u8]) -> &[u8] {
2922 &bytes[..bytes
2923 .iter()
2924 .position(|b| *b == b'\0')
2925 .unwrap_or(bytes.len())]
2926}
2927
2928fn ustar_path_name(name: &[u8; 100], prefix: &[u8; 155]) -> Result<Box<str>> {
2929 let (mut size, prefix) = if prefix[0] != b'\0' {
2930 let prefix = path_name(prefix)?;
2931 (prefix.len() + 1, Some(prefix))
2932 } else {
2933 (0, None)
2934 };
2935 let name = path_name(name)?;
2936 size += name.len();
2937 let mut path = String::with_capacity(size);
2938 if let Some(prefix) = prefix {
2939 path.push_str(prefix);
2940 path.push('/');
2941 }
2942 path.push_str(name);
2943 Ok(path.into_boxed_str())
2944}
2945fn path_name(name: &'_ [u8]) -> Result<&'_ str> {
2946 from_utf8(null_terminated(name))
2947 .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid utf8 in file path"))
2948}
2949
2950fn parse_name_field(bytes: &[u8]) -> Result<Option<Box<str>>> {
2951 let bytes = null_terminated(bytes);
2952 if bytes.is_empty() {
2953 Ok(None)
2954 } else {
2955 from_utf8(bytes)
2956 .map(|name| Some(name.to_string().into_boxed_str()))
2957 .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
2958 }
2959}
2960
2961fn ustar_name_fits(name: &str) -> bool {
2962 name.len() < 31
2963}
2964
2965fn parse_octal(field: &'_ [u8]) -> std::result::Result<u64, &'_ [u8]> {
2966 let mut n = 0u64;
2967 let mut rest = field;
2968 while let [d, r @ ..] = rest {
2969 if d == &0 || d == &b' ' {
2970 break;
2971 }
2972 if !(&b'0'..=&b'7').contains(&d) {
2973 return Err(field);
2974 }
2975 rest = r;
2976 if d == &b'0' && n == 0 {
2977 continue;
2978 }
2979 n = (n << 3) | (u64::from(*d) - u64::from(b'0'));
2980 }
2981 Ok(n)
2982}
2983
2984const fn padded_size(n: u64) -> u64 {
2985 if n == 0 {
2986 0
2987 } else {
2988 n.saturating_add(511) & !511
2989 }
2990}
2991
2992#[allow(clippy::too_many_arguments)]
2993fn write_header(
3001 buffer: &mut [u8],
3002 name: &str,
3003 link_name: Option<&str>,
3004 kind: Kind,
3005 size: u64,
3006 mode: u32,
3007 uid: u32,
3008 gid: u32,
3009 uname: Option<&str>,
3010 gname: Option<&str>,
3011 times: &EntryTimes,
3012 device: Option<(u32, u32)>,
3013 attrs: &AttrList,
3014) -> std::io::Result<usize> {
3015 if buffer.len() < BLOCK_SIZE {
3016 return Err(std::io::Error::other("buffer too small for tar header"));
3017 }
3018 let (header_buf, data_buf) = buffer.split_at_mut(BLOCK_SIZE);
3019 let header = unsafe { UstarHeader::from_buf(header_buf) };
3020 let mut total = BLOCK_SIZE;
3021
3022 let split_pos = header.path_split_point(name);
3023 let path_truncated = if let Some(pos) = split_pos {
3024 name.len() - pos - 1 > NAME_LEN
3025 } else {
3026 name.len() > NAME_LEN
3027 };
3028 let link_path_truncated = link_name
3029 .as_ref()
3030 .is_some_and(|link_name| link_name.len() > NAME_LEN);
3031 let supports_pax_metadata = !matches!(kind, Kind::Link);
3032 let supports_pax_xattrs = !matches!(kind, Kind::Link);
3033
3034 let mut records = Vec::new();
3035 if path_truncated {
3036 records.push(PaxRecord::new("path", name.as_bytes()));
3037 }
3038 if link_path_truncated {
3039 let name = link_name.unwrap();
3040 records.push(PaxRecord::new("linkpath", name.as_bytes()));
3041 }
3042 if supports_pax_metadata {
3043 if let Some(atime) = times.atime {
3044 records.push(PaxRecord::string("atime", format_pax_time(atime)));
3045 }
3046 if let Some(ctime) = times.ctime {
3047 records.push(PaxRecord::string("ctime", format_pax_time(ctime)));
3048 }
3049 }
3050 let header_mtime = header_mtime_value(times.mtime);
3051 let mtime_requires_pax = supports_pax_metadata
3052 && (header_mtime.is_none() || !octal_fits(header_mtime.unwrap_or(0), 12));
3053 let stored_header_mtime = if mtime_requires_pax {
3054 0
3055 } else {
3056 header_mtime.unwrap_or(0)
3057 };
3058 if mtime_requires_pax {
3059 records.push(PaxRecord::string("mtime", format_pax_time(times.mtime)));
3060 }
3061 let uid_requires_pax = supports_pax_metadata && !octal_fits(u64::from(uid), 8);
3062 if uid_requires_pax {
3063 records.push(PaxRecord::string("uid", uid.to_string()));
3064 }
3065 let gid_requires_pax = supports_pax_metadata && !octal_fits(u64::from(gid), 8);
3066 if gid_requires_pax {
3067 records.push(PaxRecord::string("gid", gid.to_string()));
3068 }
3069 let uname_requires_pax =
3070 supports_pax_metadata && uname.is_some_and(|uname| !ustar_name_fits(uname));
3071 if let Some(uname) = uname.filter(|uname| !ustar_name_fits(uname)) {
3072 records.push(PaxRecord::string("uname", uname.to_string()));
3073 }
3074 let gname_requires_pax =
3075 supports_pax_metadata && gname.is_some_and(|gname| !ustar_name_fits(gname));
3076 if let Some(gname) = gname.filter(|gname| !ustar_name_fits(gname)) {
3077 records.push(PaxRecord::string("gname", gname.to_string()));
3078 }
3079 let size_requires_pax = supports_pax_metadata && !octal_fits(size, 12);
3080 if size_requires_pax {
3081 records.push(PaxRecord::string("size", size.to_string()));
3082 }
3083 if supports_pax_xattrs && !attrs.is_empty() {
3084 for (name, value) in attrs.iter() {
3085 records.push(pax_xattr_record(name, value)?);
3086 }
3087 }
3088
3089 if records.is_empty() {
3090 header.set_uid(uid)?;
3091 header.set_gid(gid)?;
3092 if let Some(uname) = uname.filter(|uname| ustar_name_fits(uname)) {
3093 header.set_uname(uname);
3094 }
3095 if let Some(gname) = gname.filter(|gname| ustar_name_fits(gname)) {
3096 header.set_gname(gname);
3097 }
3098 header.set_mode(mode)?;
3099 header.set_mtime(stored_header_mtime)?;
3100 header.set_size(size)?;
3101 header.set_path(name, split_pos);
3102 if let Some(link_name) = link_name {
3103 header.set_link_path(link_name);
3104 }
3105 if let Some((major, minor)) = device {
3106 header.set_dev_major(major)?;
3107 header.set_dev_minor(minor)?;
3108 }
3109 header.set_typeflag(kind);
3110 header.finalize()?;
3111 } else {
3112 header.set_typeflag(Kind::PAXLocal);
3113 header.set_path("././@PaxHeader", None);
3114 header.set_uid(0)?;
3115 header.set_gid(0)?;
3116 header.set_mode(0)?;
3117 header.set_mtime(0)?;
3118 let mut ext_size = 0;
3119 for record in &records {
3120 ext_size += pax_record_len(&record.key, record.value.len());
3121 }
3122 if data_buf.len() < ext_size {
3123 return Err(std::io::Error::other("buffer too small for pax header"));
3124 }
3125 let mut offset = 0;
3126 for record in records {
3127 let rec_len = pax_record_len(&record.key, record.value.len());
3128 if data_buf.len() < offset + rec_len {
3129 return Err(std::io::Error::other("buffer too small for pax header"));
3130 }
3131 write_pax_record(
3132 &mut data_buf[offset..offset + rec_len],
3133 &record.key,
3134 &record.value,
3135 rec_len,
3136 )?;
3137 offset += rec_len;
3138 }
3139 header.set_size(ext_size as u64)?;
3140 header.finalize()?;
3141 let padded = padded_size(ext_size as u64);
3142 if data_buf.len() < padded as usize {
3143 return Err(std::io::Error::other("buffer too small for pax header"));
3144 }
3145 data_buf[ext_size..padded as usize].fill(0);
3146 total += padded as usize;
3147 if data_buf.len() < padded as usize + BLOCK_SIZE {
3148 return Err(std::io::Error::other("buffer too small for pax header"));
3149 }
3150 let header = unsafe {
3151 UstarHeader::from_buf(&mut data_buf[padded as usize..padded as usize + BLOCK_SIZE])
3152 };
3153 total += BLOCK_SIZE;
3154 header.set_uid(if uid_requires_pax { 0 } else { uid })?;
3155 header.set_gid(if gid_requires_pax { 0 } else { gid })?;
3156 if !uname_requires_pax {
3157 if let Some(uname) = uname {
3158 header.set_uname(uname);
3159 }
3160 }
3161 if !gname_requires_pax {
3162 if let Some(gname) = gname {
3163 header.set_gname(gname);
3164 }
3165 }
3166 header.set_mode(mode)?;
3167 header.set_mtime(stored_header_mtime)?;
3168 header.set_size(if size_requires_pax { 0 } else { size })?;
3169 header.set_typeflag(kind);
3170 header.set_path(name, split_pos);
3171 if let Some(link_name) = link_name {
3172 header.set_link_path(link_name);
3173 }
3174 if let Some((major, minor)) = device {
3175 header.set_dev_major(major)?;
3176 header.set_dev_minor(minor)?;
3177 }
3178 header.finalize()?;
3179 }
3180 Ok(total)
3181}
3182
3183fn pax_record_len(key: &str, val_len: usize) -> usize {
3184 let payload_len = key.len() + 1 + val_len + 1;
3186 let mut len = payload_len + 1 + 1;
3187 loop {
3188 let d = num_decimal_digits(len);
3189 let new_len = payload_len + 1 + d;
3190
3191 if new_len == len {
3192 return len;
3193 }
3194 len = new_len;
3195 }
3196}
3197#[inline]
3198fn num_decimal_digits(mut n: usize) -> usize {
3199 let mut c = 1;
3200 while n >= 10 {
3201 n /= 10;
3202 c += 1;
3203 }
3204 c
3205}
3206
3207fn write_pax_record(
3208 buf: &mut [u8],
3209 key: &str,
3210 value: &[u8],
3211 rec_len: usize,
3212) -> std::io::Result<()> {
3213 if buf.len() < rec_len {
3214 return Err(std::io::Error::other("buffer too small for pax record"));
3215 }
3216 let len_str = rec_len.to_string();
3217 let expected = len_str.len() + 1 + key.len() + 1 + value.len() + 1;
3218 if expected != rec_len {
3219 return Err(std::io::Error::other("pax record length mismatch"));
3220 }
3221 let mut offset = 0;
3222 buf[..len_str.len()].copy_from_slice(len_str.as_bytes());
3223 offset += len_str.len();
3224 buf[offset] = b' ';
3225 offset += 1;
3226 buf[offset..offset + key.len()].copy_from_slice(key.as_bytes());
3227 offset += key.len();
3228 buf[offset] = b'=';
3229 offset += 1;
3230 buf[offset..offset + value.len()].copy_from_slice(value);
3231 offset += value.len();
3232 buf[offset] = b'\n';
3233 Ok(())
3234}
3235
3236struct PaxRecord {
3237 key: String,
3238 value: Vec<u8>,
3239}
3240
3241impl PaxRecord {
3242 fn new(key: &str, value: &[u8]) -> Self {
3243 Self {
3244 key: key.to_string(),
3245 value: value.to_vec(),
3246 }
3247 }
3248
3249 fn string(key: &str, value: String) -> Self {
3250 Self {
3251 key: key.to_string(),
3252 value: value.into_bytes(),
3253 }
3254 }
3255}
3256
3257fn pax_xattr_record(name: &str, value: &[u8]) -> std::io::Result<PaxRecord> {
3258 if !xattr_name_is_pax_safe(name) {
3259 return Err(std::io::Error::other(
3260 "xattr name contains non-portable characters",
3261 ));
3262 }
3263 Ok(PaxRecord::new(&format!("SCHILY.xattr.{name}"), value))
3264}
3265
3266fn xattr_name_is_pax_safe(name: &str) -> bool {
3267 name.bytes().all(|b| b.is_ascii_graphic() && b != b'=')
3268}
3269
3270fn octal_fits(value: u64, field_len: usize) -> bool {
3271 let digits = if value == 0 {
3272 1
3273 } else {
3274 (64 - value.leading_zeros()).div_ceil(3)
3275 } as usize;
3276 digits < field_len
3277}
3278
3279fn format_octal(val: u64, field: &mut [u8]) -> std::io::Result<()> {
3280 let mut value = val;
3281 let mut len = field.len() - 1;
3282 field[len] = 0; while len > 0 {
3284 len -= 1;
3285 field[len] = b'0' + (value & 0o7) as u8;
3286 value >>= 3;
3287 }
3288 if value != 0 {
3289 return Err(std::io::Error::other(format!(
3290 "value {} too large to fit in octal field of len {}",
3291 val,
3292 field.len()
3293 )));
3294 }
3295 Ok(())
3296}
3297
3298pin_project! {
3299 pub struct TarWriter<'a, 'b, W, R> {
3348 buf: [u8; BLOCK_SIZE * 32],
3350 len: usize,
3352 pos: usize,
3354 total: u64,
3356 eof: u64,
3358 closed: bool,
3360 reader: Option<R>,
3362 marker_: std::marker::PhantomData<&'b ()>,
3363 #[pin]
3365 writer: W,
3366 marker: std::marker::PhantomData<&'a ()>,
3367 }
3368}
3369
3370impl<'a, 'b, W: AsyncWrite + 'a, R: AsyncRead + Unpin + 'b> TarWriter<'a, 'b, W, R> {
3371 pub fn new(writer: W) -> Self {
3373 Self {
3374 buf: [0; BLOCK_SIZE * 32],
3375 len: 0,
3376 pos: 0,
3377 total: 0,
3378 eof: 0,
3379 closed: false,
3380 reader: None,
3381 marker_: std::marker::PhantomData,
3382 writer,
3383 marker: std::marker::PhantomData,
3384 }
3385 }
3386
3387 pub async fn write(&mut self, entry: TarEntry<'b, R>) -> std::io::Result<()> {
3389 poll_fn(|cx| {
3390 unsafe { Pin::new_unchecked(&mut *self) }.poll_ready(cx)
3393 })
3394 .await?;
3395
3396 unsafe { Pin::new_unchecked(&mut *self) }.start_send(entry)?;
3398
3399 poll_fn(|cx| {
3400 unsafe { Pin::new_unchecked(&mut *self) }.poll_flush(cx)
3403 })
3404 .await
3405 }
3406
3407 pub async fn finish(&mut self) -> std::io::Result<()> {
3409 poll_fn(|cx| {
3410 unsafe { Pin::new_unchecked(&mut *self) }.poll_close(cx)
3413 })
3414 .await
3415 }
3416
3417 fn poll_drain(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
3423 let mut this = self.project();
3424 loop {
3425 while *this.pos < *this.len {
3426 let n = task::ready!(this
3427 .writer
3428 .as_mut()
3429 .poll_write(cx, &this.buf[*this.pos..*this.len]))?;
3430 if n == 0 {
3431 return Poll::Ready(Err(std::io::Error::new(
3432 std::io::ErrorKind::WriteZero,
3433 "error writing buffer",
3434 )));
3435 }
3436 *this.pos += n;
3437 *this.total += n as u64;
3438 }
3439
3440 *this.pos = 0;
3441 *this.len = 0;
3442
3443 if let Some(reader) = this.reader.as_mut() {
3444 let remain = this.eof.saturating_sub(*this.total);
3445 if remain == 0 {
3446 *this.reader = None;
3447 let padded = padded_size(*this.eof);
3448 let padding = padded.saturating_sub(*this.eof);
3449 if padding > 0 {
3450 *this.len = padding as usize;
3451 this.buf[..*this.len].fill(0);
3452 *this.eof = padded;
3453 continue;
3454 } else {
3455 return Poll::Ready(Ok(()));
3456 }
3457 }
3458 let buf_len = std::cmp::min(this.buf.len() as u64, remain) as usize;
3459 let n = task::ready!(poll_read_compat(pin!(reader), cx, &mut this.buf[..buf_len]))?;
3460 if n == 0 {
3461 return Poll::Ready(Err(std::io::Error::new(
3462 std::io::ErrorKind::UnexpectedEof,
3463 "unexpected EOF while reading file",
3464 )));
3465 }
3466 *this.pos = 0;
3467 *this.len = n;
3468 continue;
3469 } else {
3470 return Poll::Ready(Ok(()));
3471 }
3472 }
3473 }
3474}
3475
3476impl<'a, 'b, W: AsyncWrite + 'a, R: AsyncRead + Unpin + 'b> Sink<TarEntry<'b, R>>
3477 for TarWriter<'a, 'b, W, R>
3478{
3479 type Error = std::io::Error;
3480
3481 fn poll_ready(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
3482 self.poll_drain(cx)
3483 }
3484
3485 fn start_send(self: Pin<&mut Self>, item: TarEntry<'b, R>) -> std::io::Result<()> {
3486 let this = self.project();
3487
3488 if *this.len != 0 || this.reader.is_some() {
3489 return Err(std::io::Error::other(
3490 "start_send called while previous entry still in progress",
3491 ));
3492 }
3493
3494 let header_len = item.write_header(this.buf)?;
3495 *this.len = header_len;
3496
3497 if let TarEntry::File(file) = item {
3498 this.reader.replace(file.inner);
3499 *this.eof = *this.total + (header_len as u64) + file.size;
3500 }
3501
3502 Ok(())
3503 }
3504
3505 fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
3506 task::ready!(self.as_mut().poll_drain(cx))?;
3507 let mut this = self.project();
3508 task::ready!(this.writer.as_mut().poll_flush(cx))?;
3509 Poll::Ready(Ok(()))
3510 }
3511
3512 fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
3513 task::ready!(self.as_mut().poll_drain(cx))?;
3514 {
3515 let this = self.as_mut().project();
3516 if !*this.closed {
3517 this.buf[..BLOCK_SIZE * 2].fill(0);
3518 *this.len = BLOCK_SIZE * 2;
3519 *this.closed = true;
3520 }
3521 }
3522 task::ready!(self.as_mut().poll_drain(cx))?;
3523 let mut this = self.project();
3524 poll_close_compat(this.writer.as_mut(), cx)
3525 }
3526}
3527
3528#[cfg(test)]
3529mod tests {
3530 use super::*;
3531 use static_assertions::{assert_eq_align, assert_eq_size, assert_impl_all, assert_obj_safe};
3532
3533 #[cfg(feature = "smol")]
3534 type AssertFile = smol::fs::File;
3535 #[cfg(feature = "tokio")]
3536 type AssertFile = tokio::fs::File;
3537
3538 assert_impl_all!(TarReader<AssertFile>: Send, Sync);
3539 assert_impl_all!(TarEntry<AssertFile>: Send, Sync);
3540 assert_obj_safe!(TarReader<AssertFile>);
3541 assert_obj_safe!(TarEntry<AssertFile>);
3542
3543 assert_eq_align!(Header, GnuHeader);
3544 assert_eq_size!(Header, GnuHeader);
3545 assert_eq_align!(Header, UstarHeader);
3546 assert_eq_size!(Header, UstarHeader);
3547 assert_eq_align!(Header, OldHeader);
3548 assert_eq_size!(Header, OldHeader);
3549
3550 #[test]
3551 fn low_level_format_helpers_cover_edge_cases() {
3552 assert_eq!(padded_size(0), 0);
3553 assert_eq!(padded_size(1), BLOCK_SIZE as u64);
3554
3555 let mut field = [0u8; 2];
3556 let err = format_octal(8, &mut field).unwrap_err();
3557 assert!(err.to_string().contains("too large to fit"));
3558
3559 let rec_len = pax_record_len("path", 3);
3560 let err = write_pax_record(&mut vec![0; rec_len - 1], "path", b"abc", rec_len).unwrap_err();
3561 assert!(err.to_string().contains("buffer too small"));
3562
3563 let err =
3564 write_pax_record(&mut vec![0; rec_len + 1], "path", b"abc", rec_len + 1).unwrap_err();
3565 assert!(err.to_string().contains("length mismatch"));
3566 }
3567
3568 #[test]
3569 fn posix_extension_and_kind_helpers_are_exercised() {
3570 let ext = PosixExtension::from("13 path=file\n".to_string().into_boxed_str());
3571 assert_eq!(&*ext, "13 path=file\n");
3572
3573 let long_name = ExtensionHeader::LongName("name".to_string().into_boxed_str());
3574 assert_eq!(&*long_name, "name");
3575
3576 let long_link = ExtensionHeader::LongLink("link".to_string().into_boxed_str());
3577 assert_eq!(&*long_link, "link");
3578
3579 let pax = ExtensionHeader::PosixExtension(ext);
3580 assert_eq!(&*pax, "13 path=file\n");
3581
3582 for (kind, expected) in [
3583 (Kind::File, "regular file"),
3584 (Kind::Link, "link"),
3585 (Kind::Symlink, "symlink"),
3586 (Kind::CharDevice, "character device"),
3587 (Kind::BlockDevice, "block device"),
3588 (Kind::Directory, "directory"),
3589 (Kind::Fifo, "FIFO"),
3590 (Kind::GNULongName, "GNU long name extension"),
3591 (Kind::GNULongLink, "GNU long link extension"),
3592 (Kind::PAXLocal, "PAX next file extension"),
3593 (Kind::PAXGlobal, "PAX global extension"),
3594 ] {
3595 assert_eq!(kind.to_string(), expected);
3596 }
3597 }
3598
3599 #[test]
3600 fn write_header_reports_small_buffers() {
3601 let err = write_header(
3602 &mut [0u8; BLOCK_SIZE - 1],
3603 "file.txt",
3604 None,
3605 Kind::File,
3606 0,
3607 0o644,
3608 0,
3609 0,
3610 None,
3611 None,
3612 &EntryTimes::default(),
3613 None,
3614 &AttrList::default(),
3615 )
3616 .unwrap_err();
3617 assert!(err.to_string().contains("buffer too small for tar header"));
3618
3619 let mut small = vec![0u8; BLOCK_SIZE + 10];
3620 let err = write_header(
3621 &mut small,
3622 "file.txt",
3623 None,
3624 Kind::File,
3625 0,
3626 0o644,
3627 0,
3628 0,
3629 None,
3630 None,
3631 &EntryTimes::default(),
3632 None,
3633 &AttrList::new().with("user.comment", b"hello".as_slice()),
3634 )
3635 .unwrap_err();
3636 assert!(err.to_string().contains("buffer too small for pax header"));
3637 }
3638}