Skip to main content

smol_tar/
lib.rs

1#![deny(missing_docs)]
2#![deny(rustdoc::broken_intra_doc_links)]
3#![deny(rustdoc::invalid_rust_codeblocks)]
4
5//! A minimal async streaming tar reader and writer.
6//!
7//! The reader is fully streaming: [`TarReader`] yields [`TarEntry`] values from
8//! any [`AsyncRead`] source with very little buffering. Regular file entries
9//! expose their payload through [`TarRegularFile`], which also implements
10//! [`AsyncRead`]. To move on to the next entry, either read the file body to
11//! the end or drop the file reader.
12//!
13//! The writer is a [`Sink`] of [`TarEntry`] values and also
14//! provides inherent [`TarWriter::write`] and [`TarWriter::finish`] helpers for
15//! straightforward sequential writing without [`futures::sink::SinkExt`].
16//!
17//! Write entries to archive with [`TarWriter::write`] or [`futures::sink::SinkExt::send`].
18//! Finish the archive with [`TarWriter::finish`] or [`futures::sink::SinkExt::close`]
19//! so the trailing zero blocks are emitted.
20//!
21//! # Examples
22//!
23//! Runtime selection:
24//!
25//! ```toml
26//! [dependencies]
27//! smol-tar = "0.1"
28//! ```
29//!
30//! ```toml
31//! [dependencies]
32//! smol-tar = { version = "0.1", default-features = false, features = ["tokio"] }
33//! ```
34//!
35//! Reading an archive:
36//!
37//! ```rust
38//! # #[cfg(feature = "smol")]
39//! # use smol::{stream::StreamExt, io::{copy, sink, Cursor}};
40//! # #[cfg(feature = "tokio")]
41//! # use { std::io::Cursor, tokio_stream::StreamExt, tokio::io::{copy, sink} };
42//! use smol_tar::{TarEntry, TarReader};
43//! # #[cfg(feature = "smol")]
44//! # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
45//! #   smol::block_on(fut)
46//! # }
47//! # #[cfg(feature = "tokio")]
48//! # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
49//! #   tokio::runtime::Builder::new_current_thread().build().unwrap().block_on(fut)
50//! # }
51//! # block_on(async {
52//!
53//! let data = Cursor::new(vec![0; 1024]);
54//! let mut tar = TarReader::new(data);
55//!
56//! while let Some(entry) = tar.next().await {
57//!     match entry? {
58//!         TarEntry::File(mut file) => {
59//!             println!("file: {} ({} bytes)", file.path(), file.size());
60//!             copy(&mut file, &mut sink()).await?;
61//!         }
62//!         TarEntry::Directory(dir) => {
63//!             println!("dir: {}", dir.path());
64//!         }
65//!         other => {
66//!             println!("other: {}", other.path());
67//!         }
68//!     }
69//! }
70//! # std::io::Result::Ok(())
71//! # }).unwrap();
72//! ```
73//!
74//! Writing an archive:
75//!
76//! ```rust
77//! # #[cfg(feature = "smol")]
78//! # use smol::io::Cursor;
79//! # #[cfg(feature = "tokio")]
80//! # use { std::io::Cursor, tokio_stream::StreamExt };
81//! use smol_tar::{TarDirectory, TarEntry, TarRegularFile, TarWriter};
82//! # #[cfg(feature = "smol")]
83//! # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
84//! #   smol::future::block_on(fut)
85//! # }
86//! # #[cfg(feature = "tokio")]
87//! # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
88//! #   tokio::runtime::Builder::new_current_thread().build().unwrap().block_on(fut)
89//! # }
90//!
91//! # block_on(async {
92//! let sink = Cursor::new(Vec::<u8>::new());
93//! let mut tar = TarWriter::new(sink);
94//!
95//! tar.write(TarDirectory::new("bin/").into()).await?;
96//!
97//! let body = Cursor::new(b"hello\n");
98//! tar.write(
99//!     TarRegularFile::new(
100//!         "bin/hello.txt", 6, body,
101//!     ).into()
102//! ).await?;
103//!
104//! tar.finish().await?;
105//! # std::io::Result::Ok(())
106//! # }).unwrap();
107//! ```
108//!
109//! Alongside the direct API, the writer also implements the composable
110//! [`Sink`] interface:
111//!
112//! ```rust
113//! # #[cfg(feature = "smol")]
114//! # use smol::io::Cursor;
115//! # #[cfg(feature = "tokio")]
116//! # use std::io::Cursor;
117//! use {
118//!     smol_tar::{TarDirectory, TarEntry, TarReader, TarRegularFile, TarWriter},
119//!     futures::{future, SinkExt, StreamExt, TryStreamExt},
120//! };
121//! # #[cfg(feature = "smol")]
122//! # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
123//! #   smol::future::block_on(fut)
124//! # }
125//! # #[cfg(feature = "tokio")]
126//! # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
127//! #   tokio::runtime::Builder::new_current_thread().build().unwrap().block_on(fut)
128//! # }
129//! # block_on(async {
130//!
131//! let mut input = Cursor::new(Vec::<u8>::new());
132//! let mut source = TarWriter::new(&mut input);
133//! source.send(
134//!     TarDirectory::new("bin/").into()
135//! ).await?;
136//! source.send(
137//!     TarRegularFile::new(
138//!         "bin/keep.txt", 5, Cursor::new(b"keep\n".as_ref())
139//!     ).into()
140//! ).await?;
141//! source.send(
142//!     TarRegularFile::new(
143//!         "share/skip.txt", 5, Cursor::new(b"skip\n".as_ref())
144//!     ).into()
145//! ).await?;
146//! source.send(
147//!     TarRegularFile::new(
148//!         "bin/run.sh", 8, Cursor::new(b"echo hi\n".as_ref())
149//!     ).with_mode(0o755).into()
150//! ).await?;
151//! source.close().await?;
152//! input.set_position(0);
153//!
154//! let mut output = Cursor::new(Vec::<u8>::new());
155//! let mut filtered = TarWriter::new(&mut output);
156//!
157//! TarReader::new(&mut input)
158//!     .try_filter(|entry| {
159//!         future::ready(entry.path().starts_with("bin/"))
160//!     })
161//!     .forward(&mut filtered)
162//!     .await?;
163//!
164//! filtered.close().await?;
165//! output.set_position(0);
166//!
167//! let paths: Vec<String> = TarReader::new(output)
168//!     .map_ok(|entry| entry.path().to_string())
169//!     .try_collect()
170//!     .await?;
171//!
172//! assert_eq!(paths, vec!["bin/", "bin/keep.txt", "bin/run.sh"]);
173//! # std::io::Result::Ok(())
174//! # }).unwrap();
175//! ```
176//!
177//! # Supported formats
178//!
179//! Reads:
180//!
181//! - Old tar
182//! - GNU tar
183//! - POSIX ustar/pax
184//! - GNU long names and long links
185//! - PAX path metadata, timestamps, numeric ids, sizes, and extended attributes
186//!
187//! Writes:
188//!
189//! - POSIX ustar/pax
190//! - PAX records for long paths, long link targets, timestamps, numeric ids,
191//!   symbolic names, sizes, and extended attributes
192//!
193//! # Not supported
194//!
195//! - Sparse files and multi-volume archives
196//!
197
198#[cfg(all(feature = "smol", feature = "tokio"))]
199compile_error!("features `smol` and `tokio` are mutually exclusive");
200#[cfg(not(any(feature = "smol", feature = "tokio")))]
201compile_error!("either feature `smol` or `tokio` must be enabled");
202
203use {
204    async_lock::Mutex,
205    futures_lite::Stream,
206    futures_sink::Sink,
207    pin_project_lite::pin_project,
208    std::{
209        future::{poll_fn, Future},
210        io::{Error, ErrorKind, Result},
211        pin::{pin, Pin},
212        str::from_utf8,
213        sync::Arc,
214        task::{self, Context, Poll},
215        time::{Duration, SystemTime, UNIX_EPOCH},
216    },
217};
218
219#[cfg(feature = "smol")]
220use futures_lite::io::{AsyncRead, AsyncWrite};
221#[cfg(feature = "tokio")]
222use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
223
224const BLOCK_SIZE: usize = 512;
225const SKIP_BUFFER_SIZE: usize = 64 * 1024;
226const PATH_MAX: usize = 4096;
227const PAX_HEADER_MAX_SIZE: usize = 1024 * 1024;
228
229#[cfg(feature = "smol")]
230fn poll_read_compat<R: AsyncRead + ?Sized>(
231    reader: Pin<&mut R>,
232    cx: &mut Context<'_>,
233    buf: &mut [u8],
234) -> Poll<Result<usize>> {
235    reader.poll_read(cx, buf)
236}
237
238#[cfg(feature = "tokio")]
239fn poll_read_compat<R: AsyncRead + ?Sized>(
240    reader: Pin<&mut R>,
241    cx: &mut Context<'_>,
242    buf: &mut [u8],
243) -> Poll<Result<usize>> {
244    let mut read_buf = ReadBuf::new(buf);
245    match reader.poll_read(cx, &mut read_buf) {
246        Poll::Ready(Ok(())) => Poll::Ready(Ok(read_buf.filled().len())),
247        Poll::Ready(Err(err)) => Poll::Ready(Err(err)),
248        Poll::Pending => Poll::Pending,
249    }
250}
251
252#[cfg(feature = "smol")]
253fn poll_close_compat<W: AsyncWrite + ?Sized>(
254    writer: Pin<&mut W>,
255    cx: &mut Context<'_>,
256) -> Poll<Result<()>> {
257    writer.poll_close(cx)
258}
259
260#[cfg(feature = "tokio")]
261fn poll_close_compat<W: AsyncWrite + ?Sized>(
262    writer: Pin<&mut W>,
263    cx: &mut Context<'_>,
264) -> Poll<Result<()>> {
265    writer.poll_shutdown(cx)
266}
267
268fn poll_regular_file_reader<'a, R: AsyncRead>(
269    this: Pin<&mut TarRegularFileReader<'a, R>>,
270    ctx: &mut Context<'_>,
271    buf: &mut [u8],
272) -> Poll<Result<usize>> {
273    let this = this.get_mut();
274    let eof = this.eof;
275    let fut = this.inner.lock();
276    let mut g = task::ready!(pin!(fut).poll(ctx));
277    let inner_pin: Pin<&mut TarReaderInner<'a, R>> = g.as_mut();
278    let inner = inner_pin.project();
279    let n;
280    if *inner.pos > eof || (*inner.pos == eof && !matches!(*inner.state, Entry)) {
281        return Poll::Ready(Ok(0));
282    } else if *inner.pos < eof {
283        let remain = *inner.nxt - *inner.pos;
284        n = if remain > 0 {
285            let size = std::cmp::min(remain, buf.len() as u64);
286            let n = task::ready!(poll_read_compat(
287                pin!(inner.reader),
288                ctx,
289                &mut buf[0..size as usize]
290            ))?;
291            if n == 0 {
292                return Poll::Ready(Err(Error::new(
293                    ErrorKind::UnexpectedEof,
294                    "unexpected EOF while reading archive file",
295                )));
296            }
297            n
298        } else {
299            0
300        };
301        *inner.pos += n as u64;
302        if (n as u64) < remain {
303            return Poll::Ready(Ok(n));
304        }
305    } else {
306        n = 0;
307    };
308    ctx.waker().wake_by_ref();
309    let nxt = padded_size(*inner.nxt);
310    if *inner.pos == nxt {
311        *inner.nxt = nxt + BLOCK_SIZE as u64;
312        *inner.state = Header;
313    } else {
314        *inner.nxt = nxt;
315        *inner.state = Padding;
316    }
317    Poll::Ready(Ok(n))
318}
319
320macro_rules! ready_opt {
321    ($e:expr $(,)?) => {
322        match $e {
323            Poll::Ready(Ok(t)) => t,
324            Poll::Ready(Err(err)) => return Poll::Ready(Some(Err(err))),
325            Poll::Pending => return Poll::Pending,
326        }
327    };
328}
329
330#[repr(C)]
331#[allow(missing_docs)]
332struct Header {
333    record: [u8; BLOCK_SIZE],
334}
335
336enum HeaderKind<'a> {
337    Gnu(&'a GnuHeader),
338    Ustar(&'a UstarHeader),
339    Old(&'a OldHeader),
340}
341
342trait HeaderVariant {}
343
344impl Header {
345    fn new() -> Self {
346        Self {
347            record: [0u8; BLOCK_SIZE],
348        }
349    }
350    unsafe fn cast<U: HeaderVariant>(&self) -> &U {
351        &*(self as *const Self as *const U)
352    }
353    fn buf_mut<I>(&mut self, range: I) -> &mut [u8]
354    where
355        I: core::slice::SliceIndex<[u8], Output = [u8]>,
356    {
357        &mut self.record[range]
358    }
359    fn buf<I>(&self, range: I) -> &[u8]
360    where
361        I: core::slice::SliceIndex<[u8], Output = [u8]>,
362    {
363        &self.record[range]
364    }
365    fn as_str<I>(&self, range: I) -> Result<Box<str>>
366    where
367        I: core::slice::SliceIndex<[u8], Output = [u8]>,
368    {
369        from_utf8(self.buf(range))
370            .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
371            .map(|p| p.to_string().into_boxed_str())
372    }
373    fn as_null_terminated_str<I>(&self, range: I) -> Result<Box<str>>
374    where
375        I: core::slice::SliceIndex<[u8], Output = [u8]>,
376    {
377        from_utf8(null_terminated(self.buf(range)))
378            .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
379            .map(|p| p.to_string().into_boxed_str())
380    }
381    fn kind(&self) -> HeaderKind<'_> {
382        let gnu = unsafe { self.cast::<GnuHeader>() };
383        if gnu.magic == *b"ustar " && gnu.version == *b" \0" {
384            HeaderKind::Gnu(gnu)
385        } else if gnu.magic == *b"ustar\0" && gnu.version == *b"00" {
386            HeaderKind::Ustar(unsafe { self.cast::<UstarHeader>() })
387        } else {
388            HeaderKind::Old(unsafe { self.cast::<OldHeader>() })
389        }
390    }
391    fn entry_type(&self) -> std::result::Result<Kind, u8> {
392        Kind::from_byte(unsafe { self.cast::<GnuHeader>() }.typeflag[0])
393    }
394    fn checksum(&self) -> Result<u32> {
395        parse_octal(&unsafe { self.cast::<GnuHeader>() }.cksum)
396            .map(|value| value as u32)
397            .map_err(|err| {
398                Error::new(
399                    ErrorKind::InvalidData,
400                    format!(
401                        "invalid tar header checksum field: {:?}",
402                        String::from_utf8_lossy(err)
403                    ),
404                )
405            })
406    }
407    fn calculated_checksum(&self) -> u32 {
408        self.record
409            .iter()
410            .enumerate()
411            .map(|(index, byte)| {
412                if (148..156).contains(&index) {
413                    u32::from(b' ')
414                } else {
415                    u32::from(*byte)
416                }
417            })
418            .sum()
419    }
420    fn validate_checksum(&self) -> Result<()> {
421        let expected = self.checksum()?;
422        let actual = self.calculated_checksum();
423        if expected == actual {
424            Ok(())
425        } else {
426            Err(Error::new(
427                ErrorKind::InvalidData,
428                format!("invalid tar header checksum: expected {expected}, got {actual}"),
429            ))
430        }
431    }
432    fn is_gnu(&self) -> bool {
433        let gnu = unsafe { self.cast::<GnuHeader>() };
434        gnu.magic == *b"ustar " && gnu.version == *b" \0"
435    }
436    #[allow(dead_code)]
437    fn is_ustar(&self) -> bool {
438        let ustar = unsafe { self.cast::<UstarHeader>() };
439        ustar.magic == *b"ustar\0" && ustar.version == *b"00"
440    }
441    fn is_old(&self) -> bool {
442        let gnu = unsafe { self.cast::<GnuHeader>() };
443        gnu.magic[..5] != *b"ustar"
444    }
445    #[inline]
446    fn mode(&self) -> Result<u32> {
447        parse_octal(&unsafe { self.cast::<GnuHeader>() }.mode)
448            .map(|r| r as u32)
449            .map_err(|err| {
450                Error::new(
451                    ErrorKind::InvalidData,
452                    format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
453                )
454            })
455    }
456    #[inline]
457    fn mtime(&self) -> Result<u64> {
458        parse_octal(&unsafe { self.cast::<GnuHeader>() }.mtime).map_err(|err| {
459            Error::new(
460                ErrorKind::InvalidData,
461                format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
462            )
463        })
464    }
465    #[inline]
466    fn size(&self) -> Result<u64> {
467        parse_octal(&unsafe { self.cast::<GnuHeader>() }.size).map_err(|err| {
468            Error::new(
469                ErrorKind::InvalidData,
470                format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
471            )
472        })
473    }
474    #[inline]
475    fn uid(&self) -> Result<u32> {
476        parse_octal(&unsafe { self.cast::<GnuHeader>() }.uid)
477            .map(|r| r as u32)
478            .map_err(|err| {
479                Error::new(
480                    ErrorKind::InvalidData,
481                    format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
482                )
483            })
484    }
485    #[inline]
486    fn gid(&self) -> Result<u32> {
487        parse_octal(&unsafe { self.cast::<GnuHeader>() }.gid)
488            .map(|r| r as u32)
489            .map_err(|err| {
490                Error::new(
491                    ErrorKind::InvalidData,
492                    format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
493                )
494            })
495    }
496    #[inline]
497    fn dev_major(&self) -> Result<u32> {
498        parse_octal(&unsafe { self.cast::<GnuHeader>() }.dev_major)
499            .map(|r| r as u32)
500            .map_err(|err| {
501                Error::new(
502                    ErrorKind::InvalidData,
503                    format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
504                )
505            })
506    }
507    #[inline]
508    fn dev_minor(&self) -> Result<u32> {
509        parse_octal(&unsafe { self.cast::<GnuHeader>() }.dev_minor)
510            .map(|r| r as u32)
511            .map_err(|err| {
512                Error::new(
513                    ErrorKind::InvalidData,
514                    format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
515                )
516            })
517    }
518    fn uname(&self) -> Result<Option<Box<str>>> {
519        match self.kind() {
520            HeaderKind::Gnu(gnu) => parse_name_field(&gnu.uname),
521            HeaderKind::Ustar(ustar) => parse_name_field(&ustar.uname),
522            HeaderKind::Old(_) => Ok(None),
523        }
524    }
525    fn gname(&self) -> Result<Option<Box<str>>> {
526        match self.kind() {
527            HeaderKind::Gnu(gnu) => parse_name_field(&gnu.gname),
528            HeaderKind::Ustar(ustar) => parse_name_field(&ustar.gname),
529            HeaderKind::Old(_) => Ok(None),
530        }
531    }
532    fn is_zero(&self) -> bool {
533        self.record.iter().all(|b| *b == b'\0')
534    }
535}
536
537#[derive(Debug, PartialEq, Eq)]
538#[repr(u8)]
539enum Kind {
540    File0 = b'\0',
541    File = b'0',
542    Link = b'1',
543    Symlink = b'2',
544    CharDevice = b'3',
545    BlockDevice = b'4',
546    Directory = b'5',
547    Fifo = b'6',
548    #[allow(dead_code)]
549    Continous = b'7',
550    GNULongLink = b'K',
551    GNULongName = b'L',
552    PAXLocal = b'x',
553    PAXGlobal = b'g',
554}
555impl std::fmt::Display for Kind {
556    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
557        f.write_str(match self {
558            Self::File | Self::File0 | Self::Continous => "regular file",
559            Self::Link => "link",
560            Self::Symlink => "symlink",
561            Self::CharDevice => "character device",
562            Self::BlockDevice => "block device",
563            Self::Directory => "directory",
564            Self::Fifo => "FIFO",
565            Self::GNULongName => "GNU long name extension",
566            Self::GNULongLink => "GNU long link extension",
567            Self::PAXLocal => "PAX next file extension",
568            Self::PAXGlobal => "PAX global extension",
569        })
570    }
571}
572
573impl Kind {
574    fn byte(self) -> u8 {
575        self as u8
576    }
577    fn from_byte(b: u8) -> std::result::Result<Self, u8> {
578        match b {
579            v if v == Kind::File0.byte() => Ok(Kind::File0),
580            v if v == Kind::File.byte() => Ok(Kind::File),
581            v if v == Kind::Link.byte() => Ok(Kind::Link),
582            v if v == Kind::Symlink.byte() => Ok(Kind::Symlink),
583            v if v == Kind::Directory.byte() => Ok(Kind::Directory),
584            v if v == Kind::GNULongName.byte() => Ok(Kind::GNULongName),
585            v if v == Kind::GNULongLink.byte() => Ok(Kind::GNULongLink),
586            v if v == Kind::PAXLocal.byte() => Ok(Kind::PAXLocal),
587            v if v == Kind::PAXGlobal.byte() => Ok(Kind::PAXGlobal),
588            v if v == Kind::CharDevice.byte() => Ok(Kind::CharDevice),
589            v if v == Kind::BlockDevice.byte() => Ok(Kind::BlockDevice),
590            v if v == Kind::Fifo.byte() => Ok(Kind::Fifo),
591            v if v == Kind::Continous.byte() => Ok(Kind::Continous),
592            v => Err(v),
593        }
594    }
595}
596
597#[repr(C)]
598#[allow(missing_docs)]
599struct OldHeader {
600    name: [u8; 100],
601    mode: [u8; 8],
602    uid: [u8; 8],
603    gid: [u8; 8],
604    size: [u8; 12],
605    mtime: [u8; 12],
606    cksum: [u8; 8],
607    linkflag: [u8; 1],
608    linkname: [u8; 100],
609    pad: [u8; 255],
610}
611impl HeaderVariant for OldHeader {}
612impl OldHeader {
613    fn path_name(&self) -> Result<Box<str>> {
614        path_name(&self.name).map(|p| p.to_string().into_boxed_str())
615    }
616    fn link_name(&self) -> Result<Box<str>> {
617        path_name(&self.linkname).map(|p| p.to_string().into_boxed_str())
618    }
619}
620
621const NAME_LEN: usize = 100;
622const PREFIX_LEN: usize = 155;
623
624#[repr(C)]
625#[allow(missing_docs)]
626struct UstarHeader {
627    name: [u8; NAME_LEN],
628    mode: [u8; 8],
629    uid: [u8; 8],
630    gid: [u8; 8],
631    size: [u8; 12],
632    mtime: [u8; 12],
633    cksum: [u8; 8],
634    typeflag: [u8; 1],
635    linkname: [u8; NAME_LEN],
636    magic: [u8; 6],
637    version: [u8; 2],
638    uname: [u8; 32],
639    gname: [u8; 32],
640    dev_major: [u8; 8],
641    dev_minor: [u8; 8],
642    prefix: [u8; PREFIX_LEN],
643    pad: [u8; 12],
644}
645impl HeaderVariant for UstarHeader {}
646impl UstarHeader {
647    fn path_name(&self) -> Result<Box<str>> {
648        ustar_path_name(&self.name, &self.prefix)
649    }
650    fn link_name(&self) -> Result<Box<str>> {
651        path_name(&self.linkname).map(|p| p.to_string().into_boxed_str())
652    }
653    unsafe fn from_buf(buf: &mut [u8]) -> &mut Self {
654        buf[..BLOCK_SIZE].fill(0);
655        let hdr = &mut *(buf.as_mut_ptr() as *mut Self);
656        hdr.magic = *b"ustar\0";
657        hdr.version = *b"00";
658        hdr
659    }
660    fn set_dev_major(&mut self, major: u32) -> std::io::Result<()> {
661        format_octal(major as u64, &mut self.dev_major)
662    }
663    fn set_dev_minor(&mut self, minor: u32) -> std::io::Result<()> {
664        format_octal(minor as u64, &mut self.dev_minor)
665    }
666    fn set_uid(&mut self, uid: u32) -> std::io::Result<()> {
667        format_octal(uid as u64, &mut self.uid)
668    }
669    fn set_gid(&mut self, gid: u32) -> std::io::Result<()> {
670        format_octal(gid as u64, &mut self.gid)
671    }
672    fn set_uname(&mut self, uname: &str) {
673        self.uname.fill(0);
674        self.uname[..uname.len()].copy_from_slice(uname.as_bytes());
675    }
676    fn set_gname(&mut self, gname: &str) {
677        self.gname.fill(0);
678        self.gname[..gname.len()].copy_from_slice(gname.as_bytes());
679    }
680    fn set_mode(&mut self, mode: u32) -> std::io::Result<()> {
681        format_octal(mode as u64, &mut self.mode)
682    }
683    fn set_mtime(&mut self, mtime: u64) -> std::io::Result<()> {
684        format_octal(mtime, &mut self.mtime)
685    }
686    fn set_size(&mut self, size: u64) -> std::io::Result<()> {
687        format_octal(size, &mut self.size)
688    }
689    fn set_typeflag(&mut self, kind: Kind) {
690        self.typeflag[0] = kind.byte();
691    }
692    fn path_split_point(&mut self, path: &str) -> Option<usize> {
693        let bytes = path.as_bytes();
694        if bytes.len() <= self.name.len() {
695            return None;
696        }
697        bytes
698            .iter()
699            .enumerate()
700            .rfind(|(i, b)| **b == b'/' && i <= &self.prefix.len())
701            .map(|(i, _)| i)
702    }
703    fn set_path(&mut self, path: &str, split_pos: Option<usize>) {
704        if let Some(pos) = split_pos {
705            self.prefix[..pos].copy_from_slice(&path.as_bytes()[..pos]);
706            copy_utf8_truncate(&mut self.name, unsafe {
707                // SAFETY: the source string was an str, and a break, if any, was made at '/',
708                // which is a valid codepoint
709                std::str::from_utf8_unchecked(&path.as_bytes()[pos + 1..])
710            });
711        } else {
712            copy_utf8_truncate(&mut self.name, path);
713        }
714    }
715    fn set_link_path(&mut self, name: &str) {
716        copy_utf8_truncate(&mut self.linkname, name);
717    }
718    fn finalize(&mut self) -> std::io::Result<()> {
719        self.cksum.fill(b' ');
720        let buf =
721            unsafe { std::slice::from_raw_parts(self as *const Self as *const u8, BLOCK_SIZE) };
722        let checksum: u32 = buf.iter().map(|b| *b as u32).sum();
723        format_octal(checksum as u64, &mut self.cksum)
724    }
725}
726
727fn copy_utf8_truncate(field: &mut [u8], bytes: &str) {
728    if bytes.len() <= field.len() {
729        field[..bytes.len()].copy_from_slice(bytes.as_bytes());
730        return;
731    }
732    let mut cut = 0;
733    for (i, c) in bytes.char_indices() {
734        if i <= field.len() {
735            if c != '/' {
736                cut = i;
737            }
738        } else {
739            break;
740        }
741    }
742    field[..cut].copy_from_slice(&bytes.as_bytes()[..cut]);
743}
744
745#[repr(C)]
746#[allow(missing_docs)]
747struct GnuHeader {
748    name: [u8; 100],
749    mode: [u8; 8],
750    uid: [u8; 8],
751    gid: [u8; 8],
752    size: [u8; 12],
753    mtime: [u8; 12],
754    cksum: [u8; 8],
755    typeflag: [u8; 1],
756    linkname: [u8; 100],
757    magic: [u8; 6],
758    version: [u8; 2],
759    uname: [u8; 32],
760    gname: [u8; 32],
761    dev_major: [u8; 8],
762    dev_minor: [u8; 8],
763    atime: [u8; 12],
764    ctime: [u8; 12],
765    offset: [u8; 12],
766    longnames: [u8; 4],
767    unused: [u8; 1],
768    sparse: [u8; 96],
769    isextended: [u8; 1],
770    realsize: [u8; 12],
771    pad: [u8; 17],
772}
773impl HeaderVariant for GnuHeader {}
774impl GnuHeader {
775    fn path_name(&self) -> Result<Box<str>> {
776        path_name(&self.name).map(|p| p.to_string().into_boxed_str())
777    }
778    fn link_name(&self) -> Result<Box<str>> {
779        path_name(&self.linkname).map(|p| p.to_string().into_boxed_str())
780    }
781    fn atime(&self) -> Result<u64> {
782        parse_octal(&self.atime).map_err(|err| {
783            Error::new(
784                ErrorKind::InvalidData,
785                format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
786            )
787        })
788    }
789    fn ctime(&self) -> Result<u64> {
790        parse_octal(&self.ctime).map_err(|err| {
791            Error::new(
792                ErrorKind::InvalidData,
793                format!("invalid octal digit: {:?}", String::from_utf8_lossy(err)),
794            )
795        })
796    }
797}
798
799enum Entry {
800    File {
801        path_name: Box<str>,
802        size: u64,
803        eof: u64,
804        mode: u32,
805        uid: u32,
806        gid: u32,
807        uname: Option<Box<str>>,
808        gname: Option<Box<str>>,
809        times: EntryTimes,
810        attrs: AttrList,
811    },
812    Link(TarLink),
813    Symlink(TarSymlink),
814    Directory(TarDirectory),
815    Device(TarDevice),
816    Fifo(TarFifo),
817}
818
819fn effective_size(hdr: &Header, size: Option<u64>) -> Result<u64> {
820    Ok(size.unwrap_or(hdr.size()?))
821}
822
823fn effective_uid(hdr: &Header, uid: Option<u32>) -> Result<u32> {
824    Ok(uid.unwrap_or(hdr.uid()?))
825}
826
827fn effective_gid(hdr: &Header, gid: Option<u32>) -> Result<u32> {
828    Ok(gid.unwrap_or(hdr.gid()?))
829}
830
831fn effective_uname(hdr: &Header, uname: Option<Box<str>>) -> Result<Option<Box<str>>> {
832    uname.map_or_else(|| hdr.uname(), |uname| Ok(Some(uname)))
833}
834
835fn effective_gname(hdr: &Header, gname: Option<Box<str>>) -> Result<Option<Box<str>>> {
836    gname.map_or_else(|| hdr.gname(), |gname| Ok(Some(gname)))
837}
838
839fn effective_times(hdr: &Header, info: &PaxInfo) -> Result<EntryTimes> {
840    let mut times = EntryTimes::from_mtime(hdr.mtime()?)?;
841    if let HeaderKind::Gnu(gnu) = hdr.kind() {
842        let atime = gnu.atime()?;
843        if atime != 0 {
844            times.atime = Some(unix_epoch_checked_add(Duration::from_secs(atime))?);
845        }
846        let ctime = gnu.ctime()?;
847        if ctime != 0 {
848            times.ctime = Some(unix_epoch_checked_add(Duration::from_secs(ctime))?);
849        }
850    }
851    if let Some(mtime) = info.mtime {
852        times.mtime = mtime;
853    }
854    if let Some(atime) = info.atime {
855        times.atime = Some(atime);
856    }
857    if let Some(ctime) = info.ctime {
858        times.ctime = Some(ctime);
859    }
860    Ok(times)
861}
862
863#[derive(Debug, PartialEq, Eq)]
864enum State {
865    Header,
866    Extension((u32, Kind)),
867    Entry,
868    SkipEntry,
869    Padding,
870    Eof,
871    Eoff,
872}
873use State::*;
874
875struct PosixExtension {
876    inner: Box<str>,
877}
878impl PosixExtension {
879    fn validate(ext: &str) -> Result<()> {
880        parse_pax_records(ext, |_, _| Ok(()))
881    }
882    fn for_each_record(&self, cb: impl FnMut(&str, &str) -> Result<()>) -> Result<()> {
883        parse_pax_records(&self.inner, cb)
884    }
885}
886
887impl From<Box<str>> for PosixExtension {
888    fn from(s: Box<str>) -> Self {
889        Self { inner: s }
890    }
891}
892impl std::ops::Deref for PosixExtension {
893    type Target = str;
894    fn deref(&self) -> &Self::Target {
895        &self.inner
896    }
897}
898
899enum ExtensionHeader {
900    LongName(Box<str>),
901    LongLink(Box<str>),
902    PosixExtension(PosixExtension),
903}
904
905impl std::ops::Deref for ExtensionHeader {
906    type Target = str;
907    fn deref(&self) -> &Self::Target {
908        match self {
909            ExtensionHeader::LongName(name) => name,
910            ExtensionHeader::LongLink(name) => name,
911            ExtensionHeader::PosixExtension(pax) => pax,
912        }
913    }
914}
915
916struct ExtensionBuffer {
917    buf: Vec<u8>,
918}
919
920impl ExtensionBuffer {
921    fn new(size: usize) -> Self {
922        ExtensionBuffer {
923            buf: Vec::<u8>::with_capacity(size),
924        }
925    }
926    fn as_str<I>(&self, range: I) -> Result<Box<str>>
927    where
928        I: core::slice::SliceIndex<[u8], Output = [u8]>,
929    {
930        from_utf8(&self.buf[range])
931            .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
932            .map(|p| p.to_string().into_boxed_str())
933    }
934    fn as_null_terminated_str<I>(&self, range: I) -> Result<Box<str>>
935    where
936        I: core::slice::SliceIndex<[u8], Output = [u8]>,
937    {
938        from_utf8(null_terminated(&self.buf[range]))
939            .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
940            .map(|p| p.to_string().into_boxed_str())
941    }
942    unsafe fn upto(&mut self, n: usize) -> &mut [u8] {
943        std::slice::from_raw_parts_mut(self.buf.as_mut_ptr(), n)
944    }
945    unsafe fn remaining_buf(&mut self) -> &mut [u8] {
946        let remaining = self.buf.spare_capacity_mut();
947        std::slice::from_raw_parts_mut(remaining.as_mut_ptr() as *mut u8, remaining.len())
948    }
949    unsafe fn advance(&mut self, n: usize) {
950        self.buf.set_len(self.buf.len() + n)
951    }
952}
953
954pin_project! {
955    /// Core state machine that walks the tar stream block-by-block.
956    ///
957    /// `pos` tracks how much of the current block has been consumed while
958    /// `nxt` marks the boundary at which the next transition happens (end of
959    /// header, file body, padding, etc.). The `state` enum plus the optional
960    /// `ext` buffer describe what the reader is currently expecting: extension
961    /// payloads, entry data that must be skipped, or archive EOF.
962    struct TarReaderInner<'a, R> {
963        // current position in the stream
964        pos: u64,
965        // end of the current record being processed
966        nxt: u64,
967        // current state
968        state: State,
969        // the buffer for the current extended header or for skipping a entry
970        ext: Option<ExtensionBuffer>,
971        // list of the current extended headers
972        exts: Vec<ExtensionHeader>,
973        // list of the global extended headers
974        globs: Vec<PosixExtension>,
975        // the current record buffer
976        header: Header,
977        #[pin]
978        reader: R,
979        marker: std::marker::PhantomData<&'a ()>,
980    }
981}
982
983/// Async reader for the body of the current regular-file entry.
984///
985/// Instances are produced by [`TarReader`] while iterating
986/// [`TarEntry::File`] values. If you drop the reader before reaching EOF, the
987/// archive reader skips the remaining bytes for that file so iteration can
988/// carry on with the next entry.
989pub struct TarRegularFileReader<'a, R: AsyncRead + 'a> {
990    eof: u64,
991    inner: Arc<Mutex<Pin<Box<TarReaderInner<'a, R>>>>>,
992}
993
994impl<R: AsyncRead> Drop for TarRegularFileReader<'_, R> {
995    fn drop(&mut self) {
996        let inner = self.inner.clone();
997        let eof = self.eof;
998        let mut g = inner.lock_blocking();
999        let this_pin = g.as_mut();
1000        let this = this_pin.project();
1001        if *this.pos < eof {
1002            *this.state = SkipEntry;
1003        } else if *this.pos == eof && matches!(*this.state, Entry) {
1004            let nxt = padded_size(*this.nxt);
1005            if *this.pos == nxt {
1006                *this.nxt = nxt + BLOCK_SIZE as u64;
1007                *this.state = Header;
1008            } else {
1009                *this.nxt = nxt;
1010                *this.state = Padding;
1011            }
1012        }
1013    }
1014}
1015
1016#[cfg(feature = "smol")]
1017impl<'a, R: AsyncRead> AsyncRead for TarRegularFileReader<'a, R> {
1018    fn poll_read(
1019        self: Pin<&mut Self>,
1020        ctx: &mut Context<'_>,
1021        buf: &mut [u8],
1022    ) -> Poll<Result<usize>> {
1023        poll_regular_file_reader(self, ctx, buf)
1024    }
1025}
1026
1027#[cfg(feature = "tokio")]
1028impl<'a, R: AsyncRead> AsyncRead for TarRegularFileReader<'a, R> {
1029    fn poll_read(
1030        self: Pin<&mut Self>,
1031        ctx: &mut Context<'_>,
1032        buf: &mut ReadBuf<'_>,
1033    ) -> Poll<Result<()>> {
1034        match poll_regular_file_reader(self, ctx, buf.initialize_unfilled()) {
1035            Poll::Ready(Ok(n)) => {
1036                buf.advance(n);
1037                Poll::Ready(Ok(()))
1038            }
1039            Poll::Ready(Err(err)) => Poll::Ready(Err(err)),
1040            Poll::Pending => Poll::Pending,
1041        }
1042    }
1043}
1044
1045/// Streaming tar reader over an [`AsyncRead`] source.
1046///
1047/// The reader implements [`Stream`] and yields [`TarEntry`] values one at a
1048/// time. File entries remain streaming; to continue past a
1049/// [`TarEntry::File`] entry, either read the file body to the end or drop it.
1050///
1051/// # Example
1052///
1053/// ```rust
1054/// # #[cfg(feature = "smol")]
1055/// # use { smol::{ stream::StreamExt, io::{copy, sink, Cursor} } };
1056/// # #[cfg(feature = "tokio")]
1057/// # use { std::io::Cursor, tokio_stream::StreamExt, tokio::io::{copy, sink} };
1058/// use smol_tar::{TarEntry, TarReader};
1059/// # #[cfg(feature = "smol")]
1060/// # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
1061/// #   smol::block_on(fut)
1062/// # }
1063/// # #[cfg(feature = "tokio")]
1064/// # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
1065/// #   tokio::runtime::Builder::new_current_thread().build().unwrap().block_on(fut)
1066/// # }
1067/// # block_on(async {
1068///
1069/// let data = Cursor::new(vec![0; 1024]);
1070/// let mut tar = TarReader::new(data);
1071///
1072/// while let Some(entry) = tar.next().await {
1073///     match entry? {
1074///         TarEntry::File(mut file) => {
1075///             println!("file: {}", file.path());
1076///             copy(&mut file, &mut sink()).await?;
1077///         },
1078///         other => println!("entry: {}", other.path()),
1079///     }
1080/// }
1081/// # std::io::Result::Ok(())
1082/// # }).unwrap();
1083/// ```
1084pub struct TarReader<'a, R: AsyncRead + 'a> {
1085    inner: Arc<Mutex<Pin<Box<TarReaderInner<'a, R>>>>>,
1086}
1087
1088impl<'a, R: AsyncRead + 'a> TarReader<'a, R> {
1089    /// Construct a streaming reader that yields [`TarEntry`] values.
1090    pub fn new(r: R) -> Self {
1091        Self {
1092            inner: Arc::new(Mutex::new(Box::pin(TarReaderInner::new(r)))),
1093        }
1094    }
1095}
1096
1097#[derive(Clone, Debug, PartialEq, Eq)]
1098struct EntryTimes {
1099    mtime: SystemTime,
1100    atime: Option<SystemTime>,
1101    ctime: Option<SystemTime>,
1102}
1103
1104impl EntryTimes {
1105    fn from_mtime(mtime: u64) -> Result<Self> {
1106        Ok(Self {
1107            mtime: unix_epoch_checked_add(Duration::from_secs(mtime))?,
1108            atime: None,
1109            ctime: None,
1110        })
1111    }
1112
1113    fn mtime(&self) -> SystemTime {
1114        self.mtime
1115    }
1116
1117    fn with_mtime(mut self, mtime: SystemTime) -> Self {
1118        self.mtime = mtime;
1119        self
1120    }
1121
1122    fn with_atime(mut self, atime: SystemTime) -> Self {
1123        self.atime = Some(atime);
1124        self
1125    }
1126
1127    fn with_ctime(mut self, ctime: SystemTime) -> Self {
1128        self.ctime = Some(ctime);
1129        self
1130    }
1131}
1132
1133impl Default for EntryTimes {
1134    fn default() -> Self {
1135        Self {
1136            mtime: UNIX_EPOCH,
1137            atime: None,
1138            ctime: None,
1139        }
1140    }
1141}
1142
1143fn unix_epoch_checked_add(duration: Duration) -> Result<SystemTime> {
1144    UNIX_EPOCH
1145        .checked_add(duration)
1146        .ok_or_else(|| Error::new(ErrorKind::InvalidData, "timestamp out of range"))
1147}
1148
1149fn header_mtime_value(time: SystemTime) -> Option<u64> {
1150    let duration = time.duration_since(UNIX_EPOCH).ok()?;
1151    if duration.subsec_nanos() != 0 {
1152        return None;
1153    }
1154    Some(duration.as_secs())
1155}
1156
1157/// Hard-link entry stored in a tar archive.
1158pub struct TarLink {
1159    path_name: Box<str>,
1160    link_name: Box<str>,
1161}
1162impl<R: AsyncRead> From<TarLink> for TarEntry<'_, R> {
1163    fn from(link: TarLink) -> Self {
1164        Self::Link(link)
1165    }
1166}
1167impl TarLink {
1168    /// Create a hard-link entry.
1169    ///
1170    /// `path_name` is the path of the link entry in the archive and
1171    /// `link_name` is the target path stored in the header.
1172    pub fn new<N: Into<Box<str>>, L: Into<Box<str>>>(path_name: N, link_name: L) -> TarLink {
1173        TarLink {
1174            path_name: path_name.into(),
1175            link_name: link_name.into(),
1176        }
1177    }
1178    /// Return the path stored for this entry.
1179    pub fn path(&'_ self) -> &'_ str {
1180        &self.path_name
1181    }
1182    /// Return the hard-link target path stored for this entry.
1183    pub fn link(&'_ self) -> &'_ str {
1184        &self.link_name
1185    }
1186    fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1187        write_header(
1188            buffer,
1189            self.path_name.as_ref(),
1190            Some(self.link_name.as_ref()),
1191            Kind::Link,
1192            0,    // size
1193            0,    // mode
1194            0,    // uid
1195            0,    // gid
1196            None, // uname
1197            None, // gname
1198            &EntryTimes::default(),
1199            None, // device
1200            &AttrList::default(),
1201        )
1202    }
1203}
1204/// Device-node kind for [`TarDevice`].
1205pub enum DeviceKind {
1206    /// Character device entry.
1207    Char,
1208    /// Block device entry.
1209    Block,
1210}
1211
1212/// Device-node metadata stored in a tar archive.
1213pub struct TarDevice {
1214    path_name: Box<str>,
1215    mode: u32,
1216    uid: u32,
1217    gid: u32,
1218    uname: Option<Box<str>>,
1219    gname: Option<Box<str>>,
1220    times: EntryTimes,
1221    kind: DeviceKind,
1222    major: u32,
1223    minor: u32,
1224    attrs: AttrList,
1225}
1226impl<R: AsyncRead> From<TarDevice> for TarEntry<'_, R> {
1227    fn from(device: TarDevice) -> Self {
1228        Self::Device(device)
1229    }
1230}
1231impl TarDevice {
1232    /// Create a character-device entry.
1233    pub fn new_char<N: Into<Box<str>>>(path_name: N, major: u32, minor: u32) -> TarDevice {
1234        TarDevice {
1235            path_name: path_name.into(),
1236            mode: 0o600,
1237            uid: 0,
1238            gid: 0,
1239            uname: None,
1240            gname: None,
1241            times: EntryTimes::default(),
1242            major,
1243            minor,
1244            kind: DeviceKind::Char,
1245            attrs: AttrList::default(),
1246        }
1247    }
1248    /// Create a block-device entry.
1249    pub fn new_block<N: Into<Box<str>>>(path_name: N, major: u32, minor: u32) -> TarDevice {
1250        TarDevice {
1251            path_name: path_name.into(),
1252            mode: 0o600,
1253            uid: 0,
1254            gid: 0,
1255            uname: None,
1256            gname: None,
1257            times: EntryTimes::default(),
1258            major,
1259            minor,
1260            kind: DeviceKind::Block,
1261            attrs: AttrList::default(),
1262        }
1263    }
1264    /// Return the path stored for this entry.
1265    pub fn path(&'_ self) -> &'_ str {
1266        &self.path_name
1267    }
1268    /// Return the raw permission bits stored in the tar header.
1269    pub fn mode(&self) -> u32 {
1270        self.mode
1271    }
1272    /// Return the modification time stored for this entry.
1273    pub fn mtime(&self) -> SystemTime {
1274        self.times.mtime()
1275    }
1276    /// Return the access time stored for this entry, if present.
1277    pub fn atime(&self) -> Option<SystemTime> {
1278        self.times.atime
1279    }
1280    /// Return the status-change time stored for this entry, if present.
1281    pub fn ctime(&self) -> Option<SystemTime> {
1282        self.times.ctime
1283    }
1284    /// Return the raw user id stored in the tar header.
1285    pub fn uid(&self) -> u32 {
1286        self.uid
1287    }
1288    /// Return the symbolic user name stored in the tar header, if present.
1289    pub fn uname(&self) -> &str {
1290        self.uname.as_deref().unwrap_or("")
1291    }
1292    /// Replace the raw user id stored in the tar header.
1293    pub fn with_uid(mut self, uid: u32) -> Self {
1294        self.uid = uid;
1295        self
1296    }
1297    /// Replace the symbolic user name stored in the tar header.
1298    pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1299        self.uname = Some(name.into());
1300        self
1301    }
1302    /// Return the raw group id stored in the tar header.
1303    pub fn gid(&self) -> u32 {
1304        self.gid
1305    }
1306    /// Return the symbolic group name stored in the tar header, if present.
1307    pub fn gname(&self) -> &str {
1308        self.gname.as_deref().unwrap_or("")
1309    }
1310    /// Replace the raw group id stored in the tar header.
1311    pub fn with_gid(mut self, gid: u32) -> Self {
1312        self.gid = gid;
1313        self
1314    }
1315    /// Replace the symbolic group name stored in the tar header.
1316    pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1317        self.gname = Some(name.into());
1318        self
1319    }
1320    /// Return `true` if this device entry is a character device.
1321    pub fn is_char(&self) -> bool {
1322        matches!(self.kind, DeviceKind::Char)
1323    }
1324    /// Return `true` if this device entry is a block device.
1325    pub fn is_block(&self) -> bool {
1326        matches!(self.kind, DeviceKind::Block)
1327    }
1328    /// Return the raw major device number stored in the tar header.
1329    pub fn major(&self) -> u32 {
1330        self.major
1331    }
1332    /// Return the raw minor device number stored in the tar header.
1333    pub fn minor(&self) -> u32 {
1334        self.minor
1335    }
1336    /// Replace the raw permission bits stored in the tar header.
1337    pub fn with_mode(mut self, mode: u32) -> Self {
1338        self.mode = mode;
1339        self
1340    }
1341    /// Replace the modification time stored for this entry.
1342    pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1343        self.times = self.times.with_mtime(mtime);
1344        self
1345    }
1346    /// Replace the access time stored for this entry.
1347    pub fn with_atime(mut self, atime: SystemTime) -> Self {
1348        self.times = self.times.with_atime(atime);
1349        self
1350    }
1351    /// Replace the status-change time stored for this entry.
1352    pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1353        self.times = self.times.with_ctime(ctime);
1354        self
1355    }
1356    /// Return the extended attributes that will be encoded in PAX records.
1357    pub fn attrs(&self) -> &AttrList {
1358        &self.attrs
1359    }
1360    /// Return a mutable reference to the extended attributes.
1361    pub fn attrs_mut(&mut self) -> &mut AttrList {
1362        &mut self.attrs
1363    }
1364    /// Replace the extended-attribute list and return the updated entry.
1365    pub fn with_attrs(mut self, attrs: AttrList) -> Self {
1366        self.attrs = attrs;
1367        self
1368    }
1369    fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1370        write_header(
1371            buffer,
1372            self.path_name.as_ref(),
1373            None,
1374            match self.kind {
1375                DeviceKind::Char => Kind::CharDevice,
1376                DeviceKind::Block => Kind::BlockDevice,
1377            },
1378            0, // size
1379            self.mode,
1380            self.uid,
1381            self.gid,
1382            self.uname.as_deref(),
1383            self.gname.as_deref(),
1384            &self.times,
1385            Some((self.major, self.minor)),
1386            &self.attrs,
1387        )
1388    }
1389}
1390
1391/// FIFO (named pipe) metadata stored in a tar archive.
1392pub struct TarFifo {
1393    path_name: Box<str>,
1394    mode: u32,
1395    uid: u32,
1396    gid: u32,
1397    uname: Option<Box<str>>,
1398    gname: Option<Box<str>>,
1399    times: EntryTimes,
1400    attrs: AttrList,
1401}
1402impl<R: AsyncRead> From<TarFifo> for TarEntry<'_, R> {
1403    fn from(fifo: TarFifo) -> Self {
1404        Self::Fifo(fifo)
1405    }
1406}
1407impl TarFifo {
1408    /// Create a FIFO entry.
1409    pub fn new<N: Into<Box<str>>>(path_name: N) -> TarFifo {
1410        TarFifo {
1411            path_name: path_name.into(),
1412            mode: 0o644,
1413            uid: 0,
1414            gid: 0,
1415            uname: None,
1416            gname: None,
1417            times: EntryTimes::default(),
1418            attrs: AttrList::default(),
1419        }
1420    }
1421    /// Return the path stored for this entry.
1422    pub fn path(&'_ self) -> &'_ str {
1423        &self.path_name
1424    }
1425    /// Return the raw permission bits stored in the tar header.
1426    pub fn mode(&self) -> u32 {
1427        self.mode
1428    }
1429    /// Return the modification time stored for this entry.
1430    pub fn mtime(&self) -> SystemTime {
1431        self.times.mtime()
1432    }
1433    /// Return the access time stored for this entry, if present.
1434    pub fn atime(&self) -> Option<SystemTime> {
1435        self.times.atime
1436    }
1437    /// Return the status-change time stored for this entry, if present.
1438    pub fn ctime(&self) -> Option<SystemTime> {
1439        self.times.ctime
1440    }
1441    /// Return the raw user id stored in the tar header.
1442    pub fn uid(&self) -> u32 {
1443        self.uid
1444    }
1445    /// Return the symbolic user name stored in the tar header, if present.
1446    pub fn uname(&self) -> &str {
1447        self.uname.as_deref().unwrap_or("")
1448    }
1449    /// Replace the raw user id stored in the tar header.
1450    pub fn with_uid(mut self, uid: u32) -> Self {
1451        self.uid = uid;
1452        self
1453    }
1454    /// Replace the symbolic user name stored in the tar header.
1455    pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1456        self.uname = Some(name.into());
1457        self
1458    }
1459    /// Return the raw group id stored in the tar header.
1460    pub fn gid(&self) -> u32 {
1461        self.gid
1462    }
1463    /// Return the symbolic group name stored in the tar header, if present.
1464    pub fn gname(&self) -> &str {
1465        self.gname.as_deref().unwrap_or("")
1466    }
1467    /// Replace the raw group id stored in the tar header.
1468    pub fn with_gid(mut self, gid: u32) -> Self {
1469        self.gid = gid;
1470        self
1471    }
1472    /// Replace the symbolic group name stored in the tar header.
1473    pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1474        self.gname = Some(name.into());
1475        self
1476    }
1477    /// Replace the raw permission bits stored in the tar header.
1478    pub fn with_mode(mut self, mode: u32) -> Self {
1479        self.mode = mode;
1480        self
1481    }
1482    /// Replace the modification time stored for this entry.
1483    pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1484        self.times = self.times.with_mtime(mtime);
1485        self
1486    }
1487    /// Replace the access time stored for this entry.
1488    pub fn with_atime(mut self, atime: SystemTime) -> Self {
1489        self.times = self.times.with_atime(atime);
1490        self
1491    }
1492    /// Replace the status-change time stored for this entry.
1493    pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1494        self.times = self.times.with_ctime(ctime);
1495        self
1496    }
1497    /// Return the extended attributes that will be encoded in PAX records.
1498    pub fn attrs(&self) -> &AttrList {
1499        &self.attrs
1500    }
1501    /// Return a mutable reference to the extended attributes.
1502    pub fn attrs_mut(&mut self) -> &mut AttrList {
1503        &mut self.attrs
1504    }
1505    /// Replace the extended-attribute list and return the updated entry.
1506    pub fn with_attrs(mut self, attrs: AttrList) -> Self {
1507        self.attrs = attrs;
1508        self
1509    }
1510    fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1511        write_header(
1512            buffer,
1513            self.path_name.as_ref(),
1514            None,
1515            Kind::Fifo,
1516            0, // size
1517            self.mode,
1518            self.uid,
1519            self.gid,
1520            self.uname.as_deref(),
1521            self.gname.as_deref(),
1522            &self.times,
1523            None, // device
1524            &self.attrs,
1525        )
1526    }
1527}
1528
1529/// Symbolic-link metadata stored in a tar archive.
1530pub struct TarSymlink {
1531    path_name: Box<str>,
1532    link_name: Box<str>,
1533    mode: u32,
1534    uid: u32,
1535    gid: u32,
1536    uname: Option<Box<str>>,
1537    gname: Option<Box<str>>,
1538    times: EntryTimes,
1539    attrs: AttrList,
1540}
1541impl<R: AsyncRead> From<TarSymlink> for TarEntry<'_, R> {
1542    fn from(symlink: TarSymlink) -> Self {
1543        Self::Symlink(symlink)
1544    }
1545}
1546impl TarSymlink {
1547    /// Create a symbolic-link entry.
1548    ///
1549    /// `path_name` is the link path stored in the archive and `link_name` is
1550    /// the target path.
1551    pub fn new<N: Into<Box<str>>, L: Into<Box<str>>>(path_name: N, link_name: L) -> TarSymlink {
1552        TarSymlink {
1553            path_name: path_name.into(),
1554            link_name: link_name.into(),
1555            mode: 0o777,
1556            uid: 0,
1557            gid: 0,
1558            uname: None,
1559            gname: None,
1560            times: EntryTimes::default(),
1561            attrs: AttrList::default(),
1562        }
1563    }
1564    /// Return the path stored for this entry.
1565    pub fn path(&'_ self) -> &'_ str {
1566        &self.path_name
1567    }
1568    /// Return the symbolic-link target path stored for this entry.
1569    pub fn link(&'_ self) -> &'_ str {
1570        &self.link_name
1571    }
1572    /// Return the raw permission bits stored in the tar header.
1573    pub fn mode(&self) -> u32 {
1574        self.mode
1575    }
1576    /// Return the modification time stored for this entry.
1577    pub fn mtime(&self) -> SystemTime {
1578        self.times.mtime()
1579    }
1580    /// Return the access time stored for this entry, if present.
1581    pub fn atime(&self) -> Option<SystemTime> {
1582        self.times.atime
1583    }
1584    /// Return the status-change time stored for this entry, if present.
1585    pub fn ctime(&self) -> Option<SystemTime> {
1586        self.times.ctime
1587    }
1588    /// Return the raw user id stored in the tar header.
1589    pub fn uid(&self) -> u32 {
1590        self.uid
1591    }
1592    /// Return the symbolic user name stored in the tar header, if present.
1593    pub fn uname(&self) -> &str {
1594        self.uname.as_deref().unwrap_or("")
1595    }
1596    /// Replace the raw user id stored in the tar header.
1597    pub fn with_uid(mut self, uid: u32) -> Self {
1598        self.uid = uid;
1599        self
1600    }
1601    /// Replace the symbolic user name stored in the tar header.
1602    pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1603        self.uname = Some(name.into());
1604        self
1605    }
1606    /// Return the raw group id stored in the tar header.
1607    pub fn gid(&self) -> u32 {
1608        self.gid
1609    }
1610    /// Return the symbolic group name stored in the tar header, if present.
1611    pub fn gname(&self) -> &str {
1612        self.gname.as_deref().unwrap_or("")
1613    }
1614    /// Replace the raw group id stored in the tar header.
1615    pub fn with_gid(mut self, gid: u32) -> Self {
1616        self.gid = gid;
1617        self
1618    }
1619    /// Replace the symbolic group name stored in the tar header.
1620    pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1621        self.gname = Some(name.into());
1622        self
1623    }
1624    /// Replace the raw permission bits stored in the tar header.
1625    pub fn with_mode(mut self, mode: u32) -> Self {
1626        self.mode = mode;
1627        self
1628    }
1629    /// Replace the modification time stored for this entry.
1630    pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1631        self.times = self.times.with_mtime(mtime);
1632        self
1633    }
1634    /// Replace the access time stored for this entry.
1635    pub fn with_atime(mut self, atime: SystemTime) -> Self {
1636        self.times = self.times.with_atime(atime);
1637        self
1638    }
1639    /// Replace the status-change time stored for this entry.
1640    pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1641        self.times = self.times.with_ctime(ctime);
1642        self
1643    }
1644    /// Return the extended attributes that will be encoded in PAX records.
1645    pub fn attrs(&self) -> &AttrList {
1646        &self.attrs
1647    }
1648    /// Return a mutable reference to the extended attributes.
1649    pub fn attrs_mut(&mut self) -> &mut AttrList {
1650        &mut self.attrs
1651    }
1652    /// Replace the extended-attribute list and return the updated entry.
1653    pub fn with_attrs(mut self, attrs: AttrList) -> Self {
1654        self.attrs = attrs;
1655        self
1656    }
1657    fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1658        write_header(
1659            buffer,
1660            self.path_name.as_ref(),
1661            Some(self.link_name.as_ref()),
1662            Kind::Symlink,
1663            0, // size
1664            self.mode,
1665            self.uid,
1666            self.gid,
1667            self.uname.as_deref(),
1668            self.gname.as_deref(),
1669            &self.times,
1670            None, // device
1671            &self.attrs,
1672        )
1673    }
1674}
1675
1676/// Directory metadata stored in a tar archive.
1677pub struct TarDirectory {
1678    path_name: Box<str>,
1679    mode: u32,
1680    uid: u32,
1681    gid: u32,
1682    uname: Option<Box<str>>,
1683    gname: Option<Box<str>>,
1684    times: EntryTimes,
1685    size: u64,
1686    attrs: AttrList,
1687}
1688impl<R: AsyncRead> From<TarDirectory> for TarEntry<'_, R> {
1689    fn from(dir: TarDirectory) -> Self {
1690        Self::Directory(dir)
1691    }
1692}
1693impl TarDirectory {
1694    /// Create a directory entry.
1695    ///
1696    /// New directory entries are created with a stored size of `0`.
1697    pub fn new<N: Into<Box<str>>>(path_name: N) -> TarDirectory {
1698        TarDirectory {
1699            path_name: path_name.into(),
1700            size: 0,
1701            mode: 0o755,
1702            uid: 0,
1703            gid: 0,
1704            uname: None,
1705            gname: None,
1706            times: EntryTimes::default(),
1707            attrs: AttrList::default(),
1708        }
1709    }
1710    /// Return the path stored for this entry.
1711    pub fn path(&'_ self) -> &'_ str {
1712        &self.path_name
1713    }
1714    /// Return the size stored in the tar header.
1715    pub fn size(&self) -> u64 {
1716        self.size
1717    }
1718    /// Return the raw permission bits stored in the tar header.
1719    pub fn mode(&self) -> u32 {
1720        self.mode
1721    }
1722    /// Return the modification time stored for this entry.
1723    pub fn mtime(&self) -> SystemTime {
1724        self.times.mtime()
1725    }
1726    /// Return the access time stored for this entry, if present.
1727    pub fn atime(&self) -> Option<SystemTime> {
1728        self.times.atime
1729    }
1730    /// Return the status-change time stored for this entry, if present.
1731    pub fn ctime(&self) -> Option<SystemTime> {
1732        self.times.ctime
1733    }
1734    /// Return the raw user id stored in the tar header.
1735    pub fn uid(&self) -> u32 {
1736        self.uid
1737    }
1738    /// Return the symbolic user name stored in the tar header, if present.
1739    pub fn uname(&self) -> &str {
1740        self.uname.as_deref().unwrap_or("")
1741    }
1742    /// Replace the raw user id stored in the tar header.
1743    pub fn with_uid(mut self, uid: u32) -> Self {
1744        self.uid = uid;
1745        self
1746    }
1747    /// Replace the symbolic user name stored in the tar header.
1748    pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1749        self.uname = Some(name.into());
1750        self
1751    }
1752    /// Return the raw group id stored in the tar header.
1753    pub fn gid(&self) -> u32 {
1754        self.gid
1755    }
1756    /// Return the symbolic group name stored in the tar header, if present.
1757    pub fn gname(&self) -> &str {
1758        self.gname.as_deref().unwrap_or("")
1759    }
1760    /// Replace the raw group id stored in the tar header.
1761    pub fn with_gid(mut self, gid: u32) -> Self {
1762        self.gid = gid;
1763        self
1764    }
1765    /// Replace the symbolic group name stored in the tar header.
1766    pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1767        self.gname = Some(name.into());
1768        self
1769    }
1770    /// Replace the raw permission bits stored in the tar header.
1771    pub fn with_mode(mut self, mode: u32) -> Self {
1772        self.mode = mode;
1773        self
1774    }
1775    /// Replace the modification time stored for this entry.
1776    pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1777        self.times = self.times.with_mtime(mtime);
1778        self
1779    }
1780    /// Replace the access time stored for this entry.
1781    pub fn with_atime(mut self, atime: SystemTime) -> Self {
1782        self.times = self.times.with_atime(atime);
1783        self
1784    }
1785    /// Replace the status-change time stored for this entry.
1786    pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1787        self.times = self.times.with_ctime(ctime);
1788        self
1789    }
1790    /// Replace the size stored in the tar header.
1791    pub fn with_size(mut self, size: u64) -> Self {
1792        self.size = size;
1793        self
1794    }
1795    /// Return the extended attributes that will be encoded in PAX records.
1796    pub fn attrs(&self) -> &AttrList {
1797        &self.attrs
1798    }
1799    /// Return a mutable reference to the extended attributes.
1800    pub fn attrs_mut(&mut self) -> &mut AttrList {
1801        &mut self.attrs
1802    }
1803    /// Replace the extended-attribute list and return the updated entry.
1804    pub fn with_attrs(mut self, attrs: AttrList) -> Self {
1805        self.attrs = attrs;
1806        self
1807    }
1808    fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
1809        write_header(
1810            buffer,
1811            self.path_name.as_ref(),
1812            None,
1813            Kind::Directory,
1814            self.size,
1815            self.mode,
1816            self.uid,
1817            self.gid,
1818            self.uname.as_deref(),
1819            self.gname.as_deref(),
1820            &self.times,
1821            None, // device
1822            &self.attrs,
1823        )
1824    }
1825}
1826
1827#[derive(Clone, Debug, Default, PartialEq, Eq)]
1828/// List of extended attributes stored in PAX records.
1829///
1830/// Names map to raw byte values. When serialized, attributes are emitted as
1831/// `SCHILY.xattr.*` PAX records. Replacing attributes with
1832/// [`TarRegularFile::with_attrs`] or similar helpers overwrites the whole list.
1833pub struct AttrList {
1834    inner: Vec<(Box<str>, Box<[u8]>)>,
1835}
1836
1837impl AttrList {
1838    /// Create an empty attribute list.
1839    pub fn new() -> Self {
1840        Self { inner: Vec::new() }
1841    }
1842
1843    /// Returns the number of attributes stored.
1844    pub fn len(&self) -> usize {
1845        self.inner.len()
1846    }
1847
1848    /// Returns true if the list is empty.
1849    pub fn is_empty(&self) -> bool {
1850        self.inner.is_empty()
1851    }
1852
1853    /// Append an attribute name/value pair.
1854    pub fn push<N: Into<Box<str>>, V: Into<Box<[u8]>>>(&mut self, name: N, value: V) {
1855        self.inner.push((name.into(), value.into()));
1856    }
1857
1858    /// Append an attribute pair and return the list.
1859    pub fn with<N: Into<Box<str>>, V: Into<Box<[u8]>>>(mut self, name: N, value: V) -> Self {
1860        self.push(name, value);
1861        self
1862    }
1863
1864    /// Iterate over stored attributes.
1865    pub fn iter(&self) -> impl Iterator<Item = (&str, &[u8])> {
1866        self.inner
1867            .iter()
1868            .map(|(name, value)| (name.as_ref(), value.as_ref()))
1869    }
1870}
1871
1872impl From<Vec<(Box<str>, Box<[u8]>)>> for AttrList {
1873    fn from(inner: Vec<(Box<str>, Box<[u8]>)>) -> Self {
1874        Self { inner }
1875    }
1876}
1877
1878pin_project! {
1879    /// Regular-file metadata paired with a reader for its payload bytes.
1880    ///
1881    /// Values of this type are yielded by [`TarReader`] and can also be built
1882    /// manually for [`TarWriter`]. The type implements [`AsyncRead`] so callers
1883    /// can stream file contents.
1884    pub struct TarRegularFile<'a, R> {
1885        path_name: Box<str>,
1886        size: u64,
1887        mode: u32,
1888        uid: u32,
1889        gid: u32,
1890        uname: Option<Box<str>>,
1891        gname: Option<Box<str>>,
1892        times: EntryTimes,
1893        attrs: AttrList,
1894        #[pin]
1895        inner: R,
1896        marker: std::marker::PhantomData<&'a ()>,
1897    }
1898}
1899impl<'a, R: AsyncRead + 'a> TarRegularFile<'a, R> {
1900    /// Build a regular-file entry with the provided body reader.
1901    ///
1902    /// `size` must match the exact number of bytes that `inner` will yield.
1903    pub fn new<N: Into<Box<str>>>(path_name: N, size: u64, inner: R) -> TarRegularFile<'a, R> {
1904        TarRegularFile {
1905            path_name: path_name.into(),
1906            size,
1907            mode: 0o644,
1908            uid: 0,
1909            gid: 0,
1910            uname: None,
1911            gname: None,
1912            times: EntryTimes::default(),
1913            attrs: AttrList::default(),
1914            inner,
1915            marker: std::marker::PhantomData,
1916        }
1917    }
1918    /// Return the size stored in the tar header.
1919    pub fn size(&self) -> u64 {
1920        self.size
1921    }
1922    /// Return the path stored for this entry.
1923    pub fn path(&'_ self) -> &'_ str {
1924        &self.path_name
1925    }
1926    /// Return the raw permission bits stored in the tar header.
1927    pub fn mode(&self) -> u32 {
1928        self.mode
1929    }
1930    /// Return the modification time stored for this entry.
1931    pub fn mtime(&self) -> SystemTime {
1932        self.times.mtime()
1933    }
1934    /// Return the access time stored for this entry, if present.
1935    pub fn atime(&self) -> Option<SystemTime> {
1936        self.times.atime
1937    }
1938    /// Return the status-change time stored for this entry, if present.
1939    pub fn ctime(&self) -> Option<SystemTime> {
1940        self.times.ctime
1941    }
1942    /// Return the raw user id stored in the tar header.
1943    pub fn uid(&self) -> u32 {
1944        self.uid
1945    }
1946    /// Return the symbolic user name stored in the tar header, if present.
1947    pub fn uname(&self) -> &str {
1948        self.uname.as_deref().unwrap_or("")
1949    }
1950    /// Replace the raw user id stored in the tar header.
1951    pub fn with_uid(mut self, uid: u32) -> Self {
1952        self.uid = uid;
1953        self
1954    }
1955    /// Replace the symbolic user name stored in the tar header.
1956    pub fn with_uname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1957        self.uname = Some(name.into());
1958        self
1959    }
1960    /// Return the raw group id stored in the tar header.
1961    pub fn gid(&self) -> u32 {
1962        self.gid
1963    }
1964    /// Return the symbolic group name stored in the tar header, if present.
1965    pub fn gname(&self) -> &str {
1966        self.gname.as_deref().unwrap_or("")
1967    }
1968    /// Replace the raw group id stored in the tar header.
1969    pub fn with_gid(mut self, gid: u32) -> Self {
1970        self.gid = gid;
1971        self
1972    }
1973    /// Replace the symbolic group name stored in the tar header.
1974    pub fn with_gname<S: Into<Box<str>>>(mut self, name: S) -> Self {
1975        self.gname = Some(name.into());
1976        self
1977    }
1978    /// Replace the raw permission bits stored in the tar header.
1979    pub fn with_mode(mut self, mode: u32) -> Self {
1980        self.mode = mode;
1981        self
1982    }
1983    /// Replace the modification time stored for this entry.
1984    pub fn with_mtime(mut self, mtime: SystemTime) -> Self {
1985        self.times = self.times.with_mtime(mtime);
1986        self
1987    }
1988    /// Replace the access time stored for this entry.
1989    pub fn with_atime(mut self, atime: SystemTime) -> Self {
1990        self.times = self.times.with_atime(atime);
1991        self
1992    }
1993    /// Replace the status-change time stored for this entry.
1994    pub fn with_ctime(mut self, ctime: SystemTime) -> Self {
1995        self.times = self.times.with_ctime(ctime);
1996        self
1997    }
1998    /// Return the extended attributes that will be encoded in PAX records.
1999    pub fn attrs(&self) -> &AttrList {
2000        &self.attrs
2001    }
2002    /// Return a mutable reference to the extended attributes.
2003    pub fn attrs_mut(&mut self) -> &mut AttrList {
2004        &mut self.attrs
2005    }
2006    /// Replace the extended-attribute list and return the updated entry.
2007    pub fn with_attrs(mut self, attrs: AttrList) -> Self {
2008        self.attrs = attrs;
2009        self
2010    }
2011    fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
2012        write_header(
2013            buffer,
2014            self.path_name.as_ref(),
2015            None,
2016            Kind::File,
2017            self.size,
2018            self.mode,
2019            self.uid,
2020            self.gid,
2021            self.uname.as_deref(),
2022            self.gname.as_deref(),
2023            &self.times,
2024            None, // device
2025            &self.attrs,
2026        )
2027    }
2028}
2029impl<'a, R: AsyncRead + 'a> From<TarRegularFile<'a, R>> for TarEntry<'a, R> {
2030    fn from(file: TarRegularFile<'a, R>) -> Self {
2031        Self::File(file)
2032    }
2033}
2034/// High-level tar entry representation used by [`TarReader`] and [`TarWriter`].
2035pub enum TarEntry<'a, R: AsyncRead + 'a> {
2036    /// A regular file entry with a streaming body reader.
2037    File(TarRegularFile<'a, R>),
2038    /// A hard-link entry.
2039    Link(TarLink),
2040    /// A symbolic-link entry.
2041    Symlink(TarSymlink),
2042    /// A directory entry.
2043    Directory(TarDirectory),
2044    /// A character or block device entry.
2045    Device(TarDevice),
2046    /// A FIFO entry.
2047    Fifo(TarFifo),
2048}
2049
2050impl<'a, R: AsyncRead + 'a> TarEntry<'a, R> {
2051    fn write_header(&self, buffer: &mut [u8]) -> std::io::Result<usize> {
2052        match self {
2053            Self::Directory(dir) => dir.write_header(buffer),
2054            Self::Device(device) => device.write_header(buffer),
2055            Self::Fifo(fifo) => fifo.write_header(buffer),
2056            Self::File(file) => file.write_header(buffer),
2057            Self::Link(link) => link.write_header(buffer),
2058            Self::Symlink(symlink) => symlink.write_header(buffer),
2059        }
2060    }
2061}
2062
2063impl<'a, R: AsyncRead + 'a> std::fmt::Debug for TarEntry<'a, R> {
2064    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2065        match self {
2066            Self::File(file) => f
2067                .debug_struct("TarEntry::File")
2068                .field("path_name", &file.path_name)
2069                .field("size", &file.size)
2070                .field("mode", &file.mode)
2071                .field("mtime", &file.mtime())
2072                .field("atime", &file.times.atime)
2073                .field("ctime", &file.times.ctime)
2074                .field("uid", &file.uid)
2075                .field("gid", &file.gid)
2076                .field("uname", &file.uname)
2077                .field("gname", &file.gname)
2078                .field("attrs", &file.attrs.len())
2079                .finish(),
2080            Self::Device(device) => f
2081                .debug_struct("TarEntry::Device")
2082                .field("path_name", &device.path_name)
2083                .field("mode", &device.mode)
2084                .field("mtime", &device.mtime())
2085                .field("atime", &device.times.atime)
2086                .field("ctime", &device.times.ctime)
2087                .field("uid", &device.uid)
2088                .field("gid", &device.gid)
2089                .field("uname", &device.uname)
2090                .field("gname", &device.gname)
2091                .field("attrs", &device.attrs.len())
2092                .field(
2093                    "kind",
2094                    match device.kind {
2095                        DeviceKind::Char => &"char",
2096                        DeviceKind::Block => &"block",
2097                    },
2098                )
2099                .field("major", &device.major)
2100                .field("minor", &device.minor)
2101                .finish(),
2102            Self::Fifo(fifo) => f
2103                .debug_struct("TarEntry::Fifo")
2104                .field("path_name", &fifo.path_name)
2105                .field("mode", &fifo.mode)
2106                .field("mtime", &fifo.mtime())
2107                .field("atime", &fifo.times.atime)
2108                .field("ctime", &fifo.times.ctime)
2109                .field("uid", &fifo.uid)
2110                .field("gid", &fifo.gid)
2111                .field("uname", &fifo.uname)
2112                .field("gname", &fifo.gname)
2113                .field("attrs", &fifo.attrs.len())
2114                .finish(),
2115            Self::Link(link) => f
2116                .debug_struct("TarEntry::Link")
2117                .field("path_name", &link.path_name)
2118                .field("link_name", &link.link_name)
2119                .finish(),
2120            Self::Symlink(symlink) => f
2121                .debug_struct("TarEntry::Symlink")
2122                .field("path_name", &symlink.path_name)
2123                .field("link_name", &symlink.link_name)
2124                .field("mode", &symlink.mode)
2125                .field("mtime", &symlink.mtime())
2126                .field("atime", &symlink.times.atime)
2127                .field("ctime", &symlink.times.ctime)
2128                .field("uid", &symlink.uid)
2129                .field("gid", &symlink.gid)
2130                .field("uname", &symlink.uname)
2131                .field("gname", &symlink.gname)
2132                .field("attrs", &symlink.attrs.len())
2133                .finish(),
2134            Self::Directory(dir) => f
2135                .debug_struct("TarEntry::Directory")
2136                .field("path_name", &dir.path_name)
2137                .field("size", &dir.size)
2138                .field("mode", &dir.mode)
2139                .field("mtime", &dir.mtime())
2140                .field("atime", &dir.times.atime)
2141                .field("ctime", &dir.times.ctime)
2142                .field("uid", &dir.uid)
2143                .field("gid", &dir.gid)
2144                .field("uname", &dir.uname)
2145                .field("gname", &dir.gname)
2146                .field("attrs", &dir.attrs.len())
2147                .finish(),
2148        }
2149    }
2150}
2151
2152struct PaxInfo {
2153    path: Option<Box<str>>,
2154    linkpath: Option<Box<str>>,
2155    atime: Option<SystemTime>,
2156    ctime: Option<SystemTime>,
2157    mtime: Option<SystemTime>,
2158    size: Option<u64>,
2159    uid: Option<u32>,
2160    gid: Option<u32>,
2161    uname: Option<Box<str>>,
2162    gname: Option<Box<str>>,
2163    attrs: AttrList,
2164}
2165
2166fn entry_path(hdr: &Header, path: Option<Box<str>>) -> Result<Box<str>> {
2167    match hdr.kind() {
2168        HeaderKind::Gnu(hdr) => Ok(path.map_or_else(|| hdr.path_name(), Ok)?),
2169        HeaderKind::Ustar(hdr) => Ok(path.map_or_else(|| hdr.path_name(), Ok)?),
2170        HeaderKind::Old(hdr) => path.map_or_else(|| hdr.path_name(), Ok),
2171    }
2172}
2173
2174fn entry_path_link(
2175    hdr: &Header,
2176    path: Option<Box<str>>,
2177    link: Option<Box<str>>,
2178) -> Result<(Box<str>, Box<str>)> {
2179    match hdr.kind() {
2180        HeaderKind::Gnu(hdr) => Ok((
2181            path.map_or_else(|| hdr.path_name(), Ok)?,
2182            link.map_or_else(|| hdr.link_name(), Ok)?,
2183        )),
2184        HeaderKind::Ustar(hdr) => Ok((
2185            path.map_or_else(|| hdr.path_name(), Ok)?,
2186            link.map_or_else(|| hdr.link_name(), Ok)?,
2187        )),
2188        HeaderKind::Old(hdr) => Ok((
2189            path.map_or_else(|| hdr.path_name(), Ok)?,
2190            link.map_or_else(|| hdr.link_name(), Ok)?,
2191        )),
2192    }
2193}
2194
2195fn ext_as_path(hdr: &Header, size: usize, ext: &Option<ExtensionBuffer>) -> Result<Box<str>> {
2196    if size <= BLOCK_SIZE {
2197        hdr.as_null_terminated_str(..size)
2198    } else {
2199        ext.as_ref().unwrap().as_null_terminated_str(..size)
2200    }
2201}
2202fn ext_as_str(hdr: &Header, size: usize, ext: &Option<ExtensionBuffer>) -> Result<Box<str>> {
2203    if size <= BLOCK_SIZE {
2204        hdr.as_str(..size)
2205    } else {
2206        ext.as_ref().unwrap().as_str(..size)
2207    }
2208    .map(|p| p.to_string().into_boxed_str())
2209}
2210
2211fn take_pax_info(exts: &mut Vec<ExtensionHeader>, globs: &[PosixExtension]) -> Result<PaxInfo> {
2212    let mut info = PaxInfo {
2213        path: None,
2214        linkpath: None,
2215        atime: None,
2216        ctime: None,
2217        mtime: None,
2218        size: None,
2219        uid: None,
2220        gid: None,
2221        uname: None,
2222        gname: None,
2223        attrs: AttrList::default(),
2224    };
2225    for glob in globs {
2226        apply_pax_extension(glob, &mut info)?;
2227    }
2228    for ext in exts.drain(..) {
2229        match ext {
2230            ExtensionHeader::LongName(name) => info.path = Some(name),
2231            ExtensionHeader::LongLink(name) => info.linkpath = Some(name),
2232            ExtensionHeader::PosixExtension(ext) => apply_pax_extension(&ext, &mut info)?,
2233        }
2234    }
2235    Ok(info)
2236}
2237
2238fn apply_pax_extension(ext: &PosixExtension, info: &mut PaxInfo) -> Result<()> {
2239    ext.for_each_record(|key, val| {
2240        if key == "path" {
2241            info.path = Some(val.to_string().into_boxed_str());
2242        } else if key == "linkpath" {
2243            info.linkpath = Some(val.to_string().into_boxed_str());
2244        } else if key == "atime" {
2245            info.atime = Some(parse_pax_time(val)?);
2246        } else if key == "ctime" {
2247            info.ctime = Some(parse_pax_time(val)?);
2248        } else if key == "mtime" {
2249            info.mtime = Some(parse_pax_time(val)?);
2250        } else if key == "size" {
2251            info.size = Some(parse_pax_u64(val, "size")?);
2252        } else if key == "uid" {
2253            info.uid = Some(parse_pax_u32(val, "uid")?);
2254        } else if key == "gid" {
2255            info.gid = Some(parse_pax_u32(val, "gid")?);
2256        } else if key == "uname" {
2257            info.uname = Some(val.to_string().into_boxed_str());
2258        } else if key == "gname" {
2259            info.gname = Some(val.to_string().into_boxed_str());
2260        } else if let Some(name) = key.strip_prefix("SCHILY.xattr.") {
2261            attr_set(&mut info.attrs, name, val.as_bytes());
2262        }
2263        Ok(())
2264    })
2265}
2266
2267fn parse_pax_u64(val: &str, key: &str) -> Result<u64> {
2268    val.parse::<u64>().map_err(|_| {
2269        Error::new(
2270            ErrorKind::InvalidData,
2271            format!("invalid PAX {key} value: {val}"),
2272        )
2273    })
2274}
2275
2276fn parse_pax_u32(val: &str, key: &str) -> Result<u32> {
2277    let parsed = parse_pax_u64(val, key)?;
2278    parsed.try_into().map_err(|_| {
2279        Error::new(
2280            ErrorKind::InvalidData,
2281            format!("PAX {key} value out of range: {val}"),
2282        )
2283    })
2284}
2285
2286fn parse_pax_time(val: &str) -> Result<SystemTime> {
2287    let (seconds, fraction) = match val.split_once('.') {
2288        Some((seconds, fraction)) => (seconds, Some(fraction)),
2289        None => (val, None),
2290    };
2291    let nanos = match fraction {
2292        None => 0,
2293        Some(fraction) => {
2294            if fraction.is_empty() || !fraction.bytes().all(|b| b.is_ascii_digit()) {
2295                return Err(Error::new(
2296                    ErrorKind::InvalidData,
2297                    format!("invalid PAX timestamp value: {val}"),
2298                ));
2299            }
2300            if fraction.len() > 9 && fraction[9..].bytes().any(|b| b != b'0') {
2301                return Err(Error::new(
2302                    ErrorKind::InvalidData,
2303                    format!("PAX timestamp exceeds nanosecond precision: {val}"),
2304                ));
2305            }
2306            let digits = &fraction[..fraction.len().min(9)];
2307            let scale = 9 - digits.len();
2308            let raw = digits.parse::<u32>().map_err(|_| {
2309                Error::new(
2310                    ErrorKind::InvalidData,
2311                    format!("invalid PAX timestamp value: {val}"),
2312                )
2313            })?;
2314            raw.saturating_mul(10u32.pow(scale as u32))
2315        }
2316    };
2317    if let Some(seconds) = seconds.strip_prefix('-') {
2318        let whole = seconds.parse::<u64>().map_err(|_| {
2319            Error::new(
2320                ErrorKind::InvalidData,
2321                format!("invalid PAX timestamp value: {val}"),
2322            )
2323        })?;
2324        UNIX_EPOCH
2325            .checked_sub(Duration::new(whole, nanos))
2326            .ok_or_else(|| {
2327                Error::new(
2328                    ErrorKind::InvalidData,
2329                    format!("invalid PAX timestamp value: {val}"),
2330                )
2331            })
2332    } else {
2333        let whole = seconds.parse::<u64>().map_err(|_| {
2334            Error::new(
2335                ErrorKind::InvalidData,
2336                format!("invalid PAX timestamp value: {val}"),
2337            )
2338        })?;
2339        UNIX_EPOCH
2340            .checked_add(Duration::new(whole, nanos))
2341            .ok_or_else(|| {
2342                Error::new(
2343                    ErrorKind::InvalidData,
2344                    format!("invalid PAX timestamp value: {val}"),
2345                )
2346            })
2347    }
2348}
2349
2350fn format_pax_time(time: SystemTime) -> String {
2351    let (negative, delta) = match time.duration_since(UNIX_EPOCH) {
2352        Ok(delta) => (false, delta),
2353        Err(_) => (true, UNIX_EPOCH.duration_since(time).unwrap()),
2354    };
2355    let whole = delta.as_secs();
2356    let nanos = delta.subsec_nanos();
2357    let sign = if negative { "-" } else { "" };
2358    if nanos == 0 {
2359        format!("{sign}{whole}")
2360    } else {
2361        format!("{sign}{whole}.{nanos:09}")
2362            .trim_end_matches('0')
2363            .to_string()
2364    }
2365}
2366
2367fn parse_pax_records<'a>(
2368    mut ext: &'a str,
2369    mut cb: impl FnMut(&'a str, &'a str) -> Result<()>,
2370) -> Result<()> {
2371    while !ext.is_empty() {
2372        let space_pos = ext.find(' ').ok_or_else(|| {
2373            Error::new(
2374                ErrorKind::InvalidData,
2375                "malformed PAX record: missing length separator",
2376            )
2377        })?;
2378        let len = ext[..space_pos].parse::<usize>().map_err(|_| {
2379            Error::new(
2380                ErrorKind::InvalidData,
2381                "malformed PAX record: invalid length",
2382            )
2383        })?;
2384        if len == 0 || len > ext.len() || len <= space_pos + 2 || !ext.is_char_boundary(len) {
2385            return Err(Error::new(
2386                ErrorKind::InvalidData,
2387                "malformed PAX record: invalid length",
2388            ));
2389        }
2390        let record = &ext[..len];
2391        ext = &ext[len..];
2392        if !record.ends_with('\n') {
2393            return Err(Error::new(
2394                ErrorKind::InvalidData,
2395                "malformed PAX record: missing trailing newline",
2396            ));
2397        }
2398        let payload = &record[space_pos + 1..len - 1];
2399        let eq_pos = payload.find('=').ok_or_else(|| {
2400            Error::new(ErrorKind::InvalidData, "malformed PAX record: missing '='")
2401        })?;
2402        cb(&payload[..eq_pos], &payload[eq_pos + 1..])?;
2403    }
2404    Ok(())
2405}
2406
2407fn attr_set(attrs: &mut AttrList, name: &str, value: &[u8]) {
2408    if let Some(pos) = attrs
2409        .inner
2410        .iter()
2411        .position(|(existing, _)| existing.as_ref() == name)
2412    {
2413        attrs.inner.remove(pos);
2414    }
2415    attrs.inner.push((
2416        name.to_string().into_boxed_str(),
2417        value.to_vec().into_boxed_slice(),
2418    ));
2419}
2420
2421impl<'a, R: AsyncRead + 'a> TarReaderInner<'a, R> {
2422    fn new(r: R) -> Self {
2423        Self {
2424            state: Header,
2425            pos: 0,
2426            nxt: BLOCK_SIZE as u64,
2427            ext: None,
2428            exts: Vec::new(),
2429            globs: Vec::new(),
2430            header: Header::new(),
2431            reader: r,
2432            marker: std::marker::PhantomData,
2433        }
2434    }
2435    /// Advance the state machine until the next entry or EOF marker is decoded.
2436    fn poll_read_header(
2437        self: Pin<&mut Self>,
2438        ctx: &mut Context<'_>,
2439    ) -> Poll<Option<Result<Entry>>> {
2440        let mut this = self.project();
2441        loop {
2442            match this.state {
2443                Header => {
2444                    let remaining = *this.nxt - *this.pos;
2445                    let n = {
2446                        let filled = BLOCK_SIZE - remaining as usize;
2447                        let (hdr, reader) = (&mut this.header, &mut this.reader);
2448                        let n =
2449                            ready_opt!(poll_read_compat(pin!(reader), ctx, hdr.buf_mut(filled..)));
2450                        if n == 0 {
2451                            Err(Error::new(
2452                                ErrorKind::UnexpectedEof,
2453                                "Unexpected EOF while reading tar header",
2454                            ))
2455                        } else {
2456                            Ok(n)
2457                        }
2458                    }?;
2459                    *this.pos += n as u64;
2460                    if remaining != n as u64 {
2461                        continue;
2462                    }
2463                    if this.header.is_zero() {
2464                        *this.nxt += BLOCK_SIZE as u64;
2465                        *this.state = Eof;
2466                        continue;
2467                    }
2468                    this.header.validate_checksum()?;
2469                    let kind = this.header.entry_type().map_err(|t| {
2470                        Error::new(
2471                            ErrorKind::InvalidData,
2472                            format!("header type {} is not supported", t),
2473                        )
2474                    })?;
2475                    return Poll::Ready(Some(match kind {
2476                        Kind::File | Kind::File0 | Kind::Continous => {
2477                            let info = take_pax_info(this.exts, this.globs)?;
2478                            let path_name = entry_path(this.header, info.path.clone())?;
2479                            let size = effective_size(this.header, info.size)?;
2480                            let uid = effective_uid(this.header, info.uid)?;
2481                            let gid = effective_gid(this.header, info.gid)?;
2482                            let times = effective_times(this.header, &info)?;
2483                            let uname = effective_uname(this.header, info.uname)?;
2484                            let gname = effective_gname(this.header, info.gname)?;
2485                            Ok(if path_name.ends_with('/') && this.header.is_old() {
2486                                *this.nxt += BLOCK_SIZE as u64;
2487                                *this.state = Header;
2488                                Entry::Directory(TarDirectory {
2489                                    size,
2490                                    mode: this.header.mode()?,
2491                                    uid,
2492                                    gid,
2493                                    uname,
2494                                    gname,
2495                                    times,
2496                                    path_name,
2497                                    attrs: info.attrs,
2498                                })
2499                            } else {
2500                                *this.nxt += size;
2501                                *this.state = Entry;
2502                                Entry::File {
2503                                    size,
2504                                    mode: this.header.mode()?,
2505                                    uid,
2506                                    gid,
2507                                    uname,
2508                                    gname,
2509                                    times,
2510                                    eof: *this.nxt,
2511                                    path_name,
2512                                    attrs: info.attrs,
2513                                }
2514                            })
2515                        }
2516                        Kind::Directory => {
2517                            let info = take_pax_info(this.exts, this.globs)?;
2518                            let size = effective_size(this.header, info.size)?;
2519                            let uid = effective_uid(this.header, info.uid)?;
2520                            let gid = effective_gid(this.header, info.gid)?;
2521                            let times = effective_times(this.header, &info)?;
2522                            let uname = effective_uname(this.header, info.uname)?;
2523                            let gname = effective_gname(this.header, info.gname)?;
2524                            *this.nxt += BLOCK_SIZE as u64;
2525                            *this.state = Header;
2526                            let path_name = entry_path(this.header, info.path)?;
2527                            Ok(Entry::Directory(TarDirectory {
2528                                size,
2529                                mode: this.header.mode()?,
2530                                uid,
2531                                gid,
2532                                uname,
2533                                gname,
2534                                times,
2535                                path_name,
2536                                attrs: info.attrs,
2537                            }))
2538                        }
2539                        Kind::Fifo => {
2540                            let info = take_pax_info(this.exts, this.globs)?;
2541                            let uid = effective_uid(this.header, info.uid)?;
2542                            let gid = effective_gid(this.header, info.gid)?;
2543                            let times = effective_times(this.header, &info)?;
2544                            let uname = effective_uname(this.header, info.uname)?;
2545                            let gname = effective_gname(this.header, info.gname)?;
2546                            *this.nxt += BLOCK_SIZE as u64;
2547                            *this.state = Header;
2548                            let path_name = entry_path(this.header, info.path)?;
2549                            Ok(Entry::Fifo(TarFifo {
2550                                path_name,
2551                                mode: this.header.mode()?,
2552                                uid,
2553                                gid,
2554                                uname,
2555                                gname,
2556                                times,
2557                                attrs: info.attrs,
2558                            }))
2559                        }
2560                        Kind::CharDevice | Kind::BlockDevice => {
2561                            let info = take_pax_info(this.exts, this.globs)?;
2562                            let uid = effective_uid(this.header, info.uid)?;
2563                            let gid = effective_gid(this.header, info.gid)?;
2564                            let times = effective_times(this.header, &info)?;
2565                            let uname = effective_uname(this.header, info.uname)?;
2566                            let gname = effective_gname(this.header, info.gname)?;
2567                            *this.nxt += BLOCK_SIZE as u64;
2568                            *this.state = Header;
2569                            let path_name = entry_path(this.header, info.path)?;
2570                            Ok(Entry::Device(TarDevice {
2571                                path_name,
2572                                mode: this.header.mode()?,
2573                                uid,
2574                                gid,
2575                                uname,
2576                                gname,
2577                                times,
2578                                kind: match kind {
2579                                    Kind::CharDevice => DeviceKind::Char,
2580                                    Kind::BlockDevice => DeviceKind::Block,
2581                                    _ => unreachable!(),
2582                                },
2583                                major: this.header.dev_major()?,
2584                                minor: this.header.dev_minor()?,
2585                                attrs: info.attrs,
2586                            }))
2587                        }
2588                        Kind::Link => {
2589                            let info = take_pax_info(this.exts, this.globs)?;
2590                            *this.nxt += BLOCK_SIZE as u64;
2591                            *this.state = Header;
2592                            let (path_name, link_name) =
2593                                entry_path_link(this.header, info.path, info.linkpath)?;
2594                            let _ = info.attrs;
2595                            Ok(Entry::Link(TarLink {
2596                                path_name,
2597                                link_name,
2598                            }))
2599                        }
2600                        Kind::Symlink => {
2601                            let info = take_pax_info(this.exts, this.globs)?;
2602                            let uid = effective_uid(this.header, info.uid)?;
2603                            let gid = effective_gid(this.header, info.gid)?;
2604                            let times = effective_times(this.header, &info)?;
2605                            let uname = effective_uname(this.header, info.uname)?;
2606                            let gname = effective_gname(this.header, info.gname)?;
2607                            *this.nxt += BLOCK_SIZE as u64;
2608                            *this.state = Header;
2609                            let (path_name, link_name) =
2610                                entry_path_link(this.header, info.path, info.linkpath)?;
2611                            Ok(Entry::Symlink(TarSymlink {
2612                                mode: this.header.mode()?,
2613                                uid,
2614                                gid,
2615                                uname,
2616                                gname,
2617                                times,
2618                                path_name,
2619                                link_name,
2620                                attrs: info.attrs,
2621                            }))
2622                        }
2623                        Kind::PAXLocal | Kind::PAXGlobal if this.header.is_ustar() => {
2624                            let size = this.header.size().and_then(|size| {
2625                                if size as usize > PAX_HEADER_MAX_SIZE {
2626                                    Err(Error::new(
2627                                        ErrorKind::InvalidData,
2628                                        format!(
2629                                            "PAX extension exceeds {PAX_HEADER_MAX_SIZE} bytes"
2630                                        ),
2631                                    ))
2632                                } else {
2633                                    Ok(size as usize)
2634                                }
2635                            })?;
2636                            *this.state = Extension((size as u32, kind));
2637                            let padded = padded_size(size as u64);
2638                            *this.nxt += padded;
2639                            if size > BLOCK_SIZE {
2640                                this.ext.replace(ExtensionBuffer::new(padded as usize));
2641                            }
2642                            continue;
2643                        }
2644                        Kind::GNULongName | Kind::GNULongLink if this.header.is_gnu() => {
2645                            let size = this.header.size().and_then(|size| {
2646                                if size as usize > PATH_MAX {
2647                                    Err(Error::new(
2648                                        ErrorKind::InvalidData,
2649                                        format!("long filename exceeds {PATH_MAX} bytes"),
2650                                    ))
2651                                } else {
2652                                    Ok(size as usize)
2653                                }
2654                            })?;
2655                            *this.state = Extension((size as u32, kind));
2656                            let padded = padded_size(size as u64);
2657                            *this.nxt += padded;
2658                            if size > BLOCK_SIZE {
2659                                this.ext.replace(ExtensionBuffer::new(padded as usize));
2660                            }
2661                            continue;
2662                        }
2663                        kind => Err(Error::new(
2664                            ErrorKind::InvalidData,
2665                            format!("invalid tar entry header {}", kind),
2666                        )),
2667                    }));
2668                }
2669                Extension((size, kind)) => {
2670                    let (ext, reader) = (&mut this.ext, &mut this.reader);
2671                    let n = if *size as usize <= BLOCK_SIZE {
2672                        let remaining = *this.nxt - *this.pos;
2673                        let filled = BLOCK_SIZE - remaining as usize;
2674                        let (hdr, reader) = (&mut this.header, &mut this.reader);
2675                        ready_opt!(poll_read_compat(pin!(reader), ctx, hdr.buf_mut(filled..)))
2676                    } else {
2677                        let buf = ext.as_mut().unwrap();
2678                        let n = ready_opt!(poll_read_compat(pin!(reader), ctx, unsafe {
2679                            buf.remaining_buf()
2680                        }));
2681                        unsafe { buf.advance(n) };
2682                        n
2683                    };
2684                    *this.pos += if n == 0 {
2685                        Err(Error::new(
2686                            ErrorKind::UnexpectedEof,
2687                            "unexpected end of tar file",
2688                        ))
2689                    } else {
2690                        Ok(n as u64)
2691                    }?;
2692                    if *this.pos == *this.nxt {
2693                        match kind {
2694                            Kind::GNULongName => this.exts.push(ExtensionHeader::LongName(
2695                                ext_as_path(this.header, *size as usize, ext)?,
2696                            )),
2697                            Kind::GNULongLink => this.exts.push(ExtensionHeader::LongLink(
2698                                ext_as_path(this.header, *size as usize, ext)?,
2699                            )),
2700                            Kind::PAXLocal => {
2701                                let ext = ext_as_str(this.header, *size as usize, ext)?;
2702                                PosixExtension::validate(&ext)?;
2703                                this.exts.push(ExtensionHeader::PosixExtension(ext.into()));
2704                            }
2705                            Kind::PAXGlobal => {
2706                                let ext = ext_as_str(this.header, *size as usize, ext)?;
2707                                PosixExtension::validate(&ext)?;
2708                                this.globs.push(ext.into());
2709                            }
2710                            _ => unreachable!(),
2711                        };
2712                        *this.nxt += BLOCK_SIZE as u64;
2713                        *this.state = Header;
2714                    }
2715                    continue;
2716                }
2717                Padding => {
2718                    let remaining = *this.nxt - *this.pos;
2719                    let (hdr, reader) = (&mut this.header, &mut this.reader);
2720                    let n = match ready_opt!(poll_read_compat(
2721                        pin!(reader),
2722                        ctx,
2723                        hdr.buf_mut(..remaining as usize)
2724                    )) {
2725                        0 => Err(Error::new(
2726                            ErrorKind::UnexpectedEof,
2727                            "unexpected end of tar file",
2728                        )),
2729                        n => Ok(n as u64),
2730                    }?;
2731                    *this.pos += n;
2732                    if remaining == n {
2733                        *this.nxt = *this.pos + BLOCK_SIZE as u64;
2734                        *this.state = Header;
2735                    }
2736                    continue;
2737                }
2738                Entry => {
2739                    // there is a file entry that must be either dropped or
2740                    // read fully to move further
2741                    return Poll::Pending;
2742                }
2743                SkipEntry => {
2744                    // skipping a entry
2745                    let nxt = padded_size(*this.nxt);
2746                    let remaining =
2747                        std::cmp::min(SKIP_BUFFER_SIZE as u64, nxt - *this.pos) as usize;
2748                    let n = if remaining > 0 {
2749                        let buf = if let Some(buf) = this.ext.as_mut() {
2750                            buf
2751                        } else {
2752                            this.ext.replace(ExtensionBuffer::new(SKIP_BUFFER_SIZE));
2753                            this.ext.as_mut().unwrap()
2754                        };
2755                        let reader = &mut this.reader;
2756                        match ready_opt!(poll_read_compat(pin!(reader), ctx, unsafe {
2757                            buf.upto(remaining)
2758                        })) {
2759                            0 => Err(Error::new(
2760                                ErrorKind::UnexpectedEof,
2761                                "unexpected end of tar file",
2762                            )),
2763
2764                            n => Ok(n as u64),
2765                        }
2766                    } else {
2767                        Ok(0)
2768                    }?;
2769                    *this.pos += n;
2770                    if *this.pos == nxt {
2771                        this.ext.take();
2772                        *this.nxt = *this.pos + BLOCK_SIZE as u64;
2773                        *this.state = Header;
2774                    }
2775                    continue;
2776                }
2777                Eof => {
2778                    let remaining = *this.nxt - *this.pos;
2779                    let filled = BLOCK_SIZE - remaining as usize;
2780                    let (hdr, reader) = (&mut this.header, &mut this.reader);
2781                    let n = match ready_opt!(poll_read_compat(
2782                        pin!(reader),
2783                        ctx,
2784                        hdr.buf_mut(filled..)
2785                    )) {
2786                        0 => Err(Error::new(
2787                            ErrorKind::UnexpectedEof,
2788                            "unexpected end of tar file",
2789                        )),
2790                        n => Ok(n as u64),
2791                    }?;
2792                    *this.pos += n;
2793                    if remaining > n {
2794                        continue;
2795                    }
2796                    return Poll::Ready(if hdr.is_zero() {
2797                        *this.state = Eoff;
2798                        None
2799                    } else {
2800                        *this.state = Eoff;
2801                        Some(Err(Error::new(
2802                            ErrorKind::InvalidData,
2803                            "unexpected data after first zero block",
2804                        )))
2805                    });
2806                }
2807                Eoff => {
2808                    return Poll::Ready(Some(Err(Error::new(
2809                        ErrorKind::InvalidData,
2810                        "unexpected read after EOF",
2811                    ))));
2812                }
2813            }
2814        }
2815    }
2816}
2817
2818impl<'a, R: AsyncRead + 'a> Stream for TarReader<'a, R> {
2819    type Item = Result<TarEntry<'a, TarRegularFileReader<'a, R>>>;
2820    fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
2821        let this = self.as_mut();
2822        let fut = this.inner.lock();
2823        let mut g = task::ready!(pin!(fut).poll(ctx));
2824        let inner: Pin<&mut TarReaderInner<R>> = g.as_mut();
2825        let entry = {
2826            match inner.poll_next(ctx) {
2827                Poll::Pending => return Poll::Pending,
2828                Poll::Ready(None) => return Poll::Ready(None),
2829                Poll::Ready(Some(Err(err))) => return Poll::Ready(Some(Err(err))),
2830                Poll::Ready(Some(Ok(data))) => data,
2831            }
2832        };
2833        Poll::Ready(Some(Ok(match entry {
2834            Entry::File {
2835                size,
2836                mode,
2837                uid,
2838                gid,
2839                uname,
2840                gname,
2841                times,
2842                eof,
2843                path_name,
2844                attrs,
2845            } => TarEntry::File(TarRegularFile {
2846                path_name,
2847                size,
2848                mode,
2849                uid,
2850                gid,
2851                uname,
2852                gname,
2853                times,
2854                attrs,
2855                inner: TarRegularFileReader {
2856                    eof,
2857                    inner: Arc::clone(&this.inner),
2858                },
2859                marker: std::marker::PhantomData,
2860            }),
2861            Entry::Directory(d) => TarEntry::Directory(d),
2862            Entry::Link(l) => TarEntry::Link(l),
2863            Entry::Symlink(l) => TarEntry::Symlink(l),
2864            Entry::Device(d) => TarEntry::Device(d),
2865            Entry::Fifo(f) => TarEntry::Fifo(f),
2866        })))
2867    }
2868}
2869
2870impl<'a, R: AsyncRead + 'a> Stream for TarReaderInner<'a, R> {
2871    type Item = Result<Entry>;
2872    fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
2873        self.as_mut().poll_read_header(ctx)
2874    }
2875}
2876impl<'a, R: AsyncRead + 'a> TarEntry<'a, R> {
2877    /// Path of the entry regardless of concrete variant.
2878    pub fn path(&'_ self) -> &'_ str {
2879        match self {
2880            Self::File(f) => &f.path_name,
2881            Self::Link(l) => &l.path_name,
2882            Self::Symlink(l) => &l.path_name,
2883            Self::Directory(d) => &d.path_name,
2884            Self::Device(d) => &d.path_name,
2885            Self::Fifo(f) => &f.path_name,
2886        }
2887    }
2888}
2889
2890#[cfg(feature = "smol")]
2891impl<'a, R: AsyncRead + 'a> AsyncRead for TarRegularFile<'a, R> {
2892    fn poll_read(
2893        self: Pin<&mut Self>,
2894        ctx: &mut Context<'_>,
2895        buf: &mut [u8],
2896    ) -> Poll<std::io::Result<usize>> {
2897        let this = self.project();
2898        poll_read_compat(pin!(this.inner), ctx, buf)
2899    }
2900}
2901
2902#[cfg(feature = "tokio")]
2903impl<'a, R: AsyncRead + 'a> AsyncRead for TarRegularFile<'a, R> {
2904    fn poll_read(
2905        self: Pin<&mut Self>,
2906        ctx: &mut Context<'_>,
2907        buf: &mut ReadBuf<'_>,
2908    ) -> Poll<std::io::Result<()>> {
2909        let this = self.project();
2910        match poll_read_compat(pin!(this.inner), ctx, buf.initialize_unfilled()) {
2911            Poll::Ready(Ok(n)) => {
2912                buf.advance(n);
2913                Poll::Ready(Ok(()))
2914            }
2915            Poll::Ready(Err(err)) => Poll::Ready(Err(err)),
2916            Poll::Pending => Poll::Pending,
2917        }
2918    }
2919}
2920
2921fn null_terminated(bytes: &[u8]) -> &[u8] {
2922    &bytes[..bytes
2923        .iter()
2924        .position(|b| *b == b'\0')
2925        .unwrap_or(bytes.len())]
2926}
2927
2928fn ustar_path_name(name: &[u8; 100], prefix: &[u8; 155]) -> Result<Box<str>> {
2929    let (mut size, prefix) = if prefix[0] != b'\0' {
2930        let prefix = path_name(prefix)?;
2931        (prefix.len() + 1, Some(prefix))
2932    } else {
2933        (0, None)
2934    };
2935    let name = path_name(name)?;
2936    size += name.len();
2937    let mut path = String::with_capacity(size);
2938    if let Some(prefix) = prefix {
2939        path.push_str(prefix);
2940        path.push('/');
2941    }
2942    path.push_str(name);
2943    Ok(path.into_boxed_str())
2944}
2945fn path_name(name: &'_ [u8]) -> Result<&'_ str> {
2946    from_utf8(null_terminated(name))
2947        .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid utf8 in file path"))
2948}
2949
2950fn parse_name_field(bytes: &[u8]) -> Result<Option<Box<str>>> {
2951    let bytes = null_terminated(bytes);
2952    if bytes.is_empty() {
2953        Ok(None)
2954    } else {
2955        from_utf8(bytes)
2956            .map(|name| Some(name.to_string().into_boxed_str()))
2957            .map_err(|_| Error::new(ErrorKind::InvalidData, "invalid UTF-8"))
2958    }
2959}
2960
2961fn ustar_name_fits(name: &str) -> bool {
2962    name.len() < 31
2963}
2964
2965fn parse_octal(field: &'_ [u8]) -> std::result::Result<u64, &'_ [u8]> {
2966    let mut n = 0u64;
2967    let mut rest = field;
2968    while let [d, r @ ..] = rest {
2969        if d == &0 || d == &b' ' {
2970            break;
2971        }
2972        if !(&b'0'..=&b'7').contains(&d) {
2973            return Err(field);
2974        }
2975        rest = r;
2976        if d == &b'0' && n == 0 {
2977            continue;
2978        }
2979        n = (n << 3) | (u64::from(*d) - u64::from(b'0'));
2980    }
2981    Ok(n)
2982}
2983
2984const fn padded_size(n: u64) -> u64 {
2985    if n == 0 {
2986        0
2987    } else {
2988        n.saturating_add(511) & !511
2989    }
2990}
2991
2992#[allow(clippy::too_many_arguments)]
2993/// Write a single header, plus an optional PAX prefix, into `buffer`.
2994///
2995/// The function encodes metadata using the ustar layout and, if either the path
2996/// or link name is too long, or if PAX attributes are supplied (for example
2997/// extended attributes), prepends a PAX header containing the full values
2998/// before duplicating the original header so downstream tools can still read
2999/// the entry.
3000fn write_header(
3001    buffer: &mut [u8],
3002    name: &str,
3003    link_name: Option<&str>,
3004    kind: Kind,
3005    size: u64,
3006    mode: u32,
3007    uid: u32,
3008    gid: u32,
3009    uname: Option<&str>,
3010    gname: Option<&str>,
3011    times: &EntryTimes,
3012    device: Option<(u32, u32)>,
3013    attrs: &AttrList,
3014) -> std::io::Result<usize> {
3015    if buffer.len() < BLOCK_SIZE {
3016        return Err(std::io::Error::other("buffer too small for tar header"));
3017    }
3018    let (header_buf, data_buf) = buffer.split_at_mut(BLOCK_SIZE);
3019    let header = unsafe { UstarHeader::from_buf(header_buf) };
3020    let mut total = BLOCK_SIZE;
3021
3022    let split_pos = header.path_split_point(name);
3023    let path_truncated = if let Some(pos) = split_pos {
3024        name.len() - pos - 1 > NAME_LEN
3025    } else {
3026        name.len() > NAME_LEN
3027    };
3028    let link_path_truncated = link_name
3029        .as_ref()
3030        .is_some_and(|link_name| link_name.len() > NAME_LEN);
3031    let supports_pax_metadata = !matches!(kind, Kind::Link);
3032    let supports_pax_xattrs = !matches!(kind, Kind::Link);
3033
3034    let mut records = Vec::new();
3035    if path_truncated {
3036        records.push(PaxRecord::new("path", name.as_bytes()));
3037    }
3038    if link_path_truncated {
3039        let name = link_name.unwrap();
3040        records.push(PaxRecord::new("linkpath", name.as_bytes()));
3041    }
3042    if supports_pax_metadata {
3043        if let Some(atime) = times.atime {
3044            records.push(PaxRecord::string("atime", format_pax_time(atime)));
3045        }
3046        if let Some(ctime) = times.ctime {
3047            records.push(PaxRecord::string("ctime", format_pax_time(ctime)));
3048        }
3049    }
3050    let header_mtime = header_mtime_value(times.mtime);
3051    let mtime_requires_pax = supports_pax_metadata
3052        && (header_mtime.is_none() || !octal_fits(header_mtime.unwrap_or(0), 12));
3053    let stored_header_mtime = if mtime_requires_pax {
3054        0
3055    } else {
3056        header_mtime.unwrap_or(0)
3057    };
3058    if mtime_requires_pax {
3059        records.push(PaxRecord::string("mtime", format_pax_time(times.mtime)));
3060    }
3061    let uid_requires_pax = supports_pax_metadata && !octal_fits(u64::from(uid), 8);
3062    if uid_requires_pax {
3063        records.push(PaxRecord::string("uid", uid.to_string()));
3064    }
3065    let gid_requires_pax = supports_pax_metadata && !octal_fits(u64::from(gid), 8);
3066    if gid_requires_pax {
3067        records.push(PaxRecord::string("gid", gid.to_string()));
3068    }
3069    let uname_requires_pax =
3070        supports_pax_metadata && uname.is_some_and(|uname| !ustar_name_fits(uname));
3071    if let Some(uname) = uname.filter(|uname| !ustar_name_fits(uname)) {
3072        records.push(PaxRecord::string("uname", uname.to_string()));
3073    }
3074    let gname_requires_pax =
3075        supports_pax_metadata && gname.is_some_and(|gname| !ustar_name_fits(gname));
3076    if let Some(gname) = gname.filter(|gname| !ustar_name_fits(gname)) {
3077        records.push(PaxRecord::string("gname", gname.to_string()));
3078    }
3079    let size_requires_pax = supports_pax_metadata && !octal_fits(size, 12);
3080    if size_requires_pax {
3081        records.push(PaxRecord::string("size", size.to_string()));
3082    }
3083    if supports_pax_xattrs && !attrs.is_empty() {
3084        for (name, value) in attrs.iter() {
3085            records.push(pax_xattr_record(name, value)?);
3086        }
3087    }
3088
3089    if records.is_empty() {
3090        header.set_uid(uid)?;
3091        header.set_gid(gid)?;
3092        if let Some(uname) = uname.filter(|uname| ustar_name_fits(uname)) {
3093            header.set_uname(uname);
3094        }
3095        if let Some(gname) = gname.filter(|gname| ustar_name_fits(gname)) {
3096            header.set_gname(gname);
3097        }
3098        header.set_mode(mode)?;
3099        header.set_mtime(stored_header_mtime)?;
3100        header.set_size(size)?;
3101        header.set_path(name, split_pos);
3102        if let Some(link_name) = link_name {
3103            header.set_link_path(link_name);
3104        }
3105        if let Some((major, minor)) = device {
3106            header.set_dev_major(major)?;
3107            header.set_dev_minor(minor)?;
3108        }
3109        header.set_typeflag(kind);
3110        header.finalize()?;
3111    } else {
3112        header.set_typeflag(Kind::PAXLocal);
3113        header.set_path("././@PaxHeader", None);
3114        header.set_uid(0)?;
3115        header.set_gid(0)?;
3116        header.set_mode(0)?;
3117        header.set_mtime(0)?;
3118        let mut ext_size = 0;
3119        for record in &records {
3120            ext_size += pax_record_len(&record.key, record.value.len());
3121        }
3122        if data_buf.len() < ext_size {
3123            return Err(std::io::Error::other("buffer too small for pax header"));
3124        }
3125        let mut offset = 0;
3126        for record in records {
3127            let rec_len = pax_record_len(&record.key, record.value.len());
3128            if data_buf.len() < offset + rec_len {
3129                return Err(std::io::Error::other("buffer too small for pax header"));
3130            }
3131            write_pax_record(
3132                &mut data_buf[offset..offset + rec_len],
3133                &record.key,
3134                &record.value,
3135                rec_len,
3136            )?;
3137            offset += rec_len;
3138        }
3139        header.set_size(ext_size as u64)?;
3140        header.finalize()?;
3141        let padded = padded_size(ext_size as u64);
3142        if data_buf.len() < padded as usize {
3143            return Err(std::io::Error::other("buffer too small for pax header"));
3144        }
3145        data_buf[ext_size..padded as usize].fill(0);
3146        total += padded as usize;
3147        if data_buf.len() < padded as usize + BLOCK_SIZE {
3148            return Err(std::io::Error::other("buffer too small for pax header"));
3149        }
3150        let header = unsafe {
3151            UstarHeader::from_buf(&mut data_buf[padded as usize..padded as usize + BLOCK_SIZE])
3152        };
3153        total += BLOCK_SIZE;
3154        header.set_uid(if uid_requires_pax { 0 } else { uid })?;
3155        header.set_gid(if gid_requires_pax { 0 } else { gid })?;
3156        if !uname_requires_pax {
3157            if let Some(uname) = uname {
3158                header.set_uname(uname);
3159            }
3160        }
3161        if !gname_requires_pax {
3162            if let Some(gname) = gname {
3163                header.set_gname(gname);
3164            }
3165        }
3166        header.set_mode(mode)?;
3167        header.set_mtime(stored_header_mtime)?;
3168        header.set_size(if size_requires_pax { 0 } else { size })?;
3169        header.set_typeflag(kind);
3170        header.set_path(name, split_pos);
3171        if let Some(link_name) = link_name {
3172            header.set_link_path(link_name);
3173        }
3174        if let Some((major, minor)) = device {
3175            header.set_dev_major(major)?;
3176            header.set_dev_minor(minor)?;
3177        }
3178        header.finalize()?;
3179    }
3180    Ok(total)
3181}
3182
3183fn pax_record_len(key: &str, val_len: usize) -> usize {
3184    // <LEN> SP <KEY>=<VALUE>\n
3185    let payload_len = key.len() + 1 + val_len + 1;
3186    let mut len = payload_len + 1 + 1;
3187    loop {
3188        let d = num_decimal_digits(len);
3189        let new_len = payload_len + 1 + d;
3190
3191        if new_len == len {
3192            return len;
3193        }
3194        len = new_len;
3195    }
3196}
3197#[inline]
3198fn num_decimal_digits(mut n: usize) -> usize {
3199    let mut c = 1;
3200    while n >= 10 {
3201        n /= 10;
3202        c += 1;
3203    }
3204    c
3205}
3206
3207fn write_pax_record(
3208    buf: &mut [u8],
3209    key: &str,
3210    value: &[u8],
3211    rec_len: usize,
3212) -> std::io::Result<()> {
3213    if buf.len() < rec_len {
3214        return Err(std::io::Error::other("buffer too small for pax record"));
3215    }
3216    let len_str = rec_len.to_string();
3217    let expected = len_str.len() + 1 + key.len() + 1 + value.len() + 1;
3218    if expected != rec_len {
3219        return Err(std::io::Error::other("pax record length mismatch"));
3220    }
3221    let mut offset = 0;
3222    buf[..len_str.len()].copy_from_slice(len_str.as_bytes());
3223    offset += len_str.len();
3224    buf[offset] = b' ';
3225    offset += 1;
3226    buf[offset..offset + key.len()].copy_from_slice(key.as_bytes());
3227    offset += key.len();
3228    buf[offset] = b'=';
3229    offset += 1;
3230    buf[offset..offset + value.len()].copy_from_slice(value);
3231    offset += value.len();
3232    buf[offset] = b'\n';
3233    Ok(())
3234}
3235
3236struct PaxRecord {
3237    key: String,
3238    value: Vec<u8>,
3239}
3240
3241impl PaxRecord {
3242    fn new(key: &str, value: &[u8]) -> Self {
3243        Self {
3244            key: key.to_string(),
3245            value: value.to_vec(),
3246        }
3247    }
3248
3249    fn string(key: &str, value: String) -> Self {
3250        Self {
3251            key: key.to_string(),
3252            value: value.into_bytes(),
3253        }
3254    }
3255}
3256
3257fn pax_xattr_record(name: &str, value: &[u8]) -> std::io::Result<PaxRecord> {
3258    if !xattr_name_is_pax_safe(name) {
3259        return Err(std::io::Error::other(
3260            "xattr name contains non-portable characters",
3261        ));
3262    }
3263    Ok(PaxRecord::new(&format!("SCHILY.xattr.{name}"), value))
3264}
3265
3266fn xattr_name_is_pax_safe(name: &str) -> bool {
3267    name.bytes().all(|b| b.is_ascii_graphic() && b != b'=')
3268}
3269
3270fn octal_fits(value: u64, field_len: usize) -> bool {
3271    let digits = if value == 0 {
3272        1
3273    } else {
3274        (64 - value.leading_zeros()).div_ceil(3)
3275    } as usize;
3276    digits < field_len
3277}
3278
3279fn format_octal(val: u64, field: &mut [u8]) -> std::io::Result<()> {
3280    let mut value = val;
3281    let mut len = field.len() - 1;
3282    field[len] = 0; // null terminator
3283    while len > 0 {
3284        len -= 1;
3285        field[len] = b'0' + (value & 0o7) as u8;
3286        value >>= 3;
3287    }
3288    if value != 0 {
3289        return Err(std::io::Error::other(format!(
3290            "value {} too large to fit in octal field of len {}",
3291            val,
3292            field.len()
3293        )));
3294    }
3295    Ok(())
3296}
3297
3298pin_project! {
3299    /// Streaming tar writer that implements `future_sink::Sink<TarEntry<...>>`.
3300    ///
3301    /// Headers and file payloads are staged inside `buf` until downstream I/O
3302    /// makes progress, which keeps memory usage predictable while preserving
3303    /// proper block alignment.
3304    ///
3305    /// The composable `Sink` interface is handy, but if bringing in the
3306    /// `futures` crate feels a bit much, the writer also provides inherent
3307    /// methods. Use [`TarWriter::write`] to send entries to the tar stream, and
3308    /// [`TarWriter::finish`] once all entries have been written so the
3309    /// terminating zero blocks required by the format are emitted.
3310    /// [`futures::sink:SinkExt::close`] performs the same operation.
3311    ///
3312    /// Type inference usually determines `R` from the file entries you send. If
3313    /// you only send metadata-only entries such as directories, you may need an
3314    /// explicit `TarWriter::<_, SomeReaderType>::new(...)` annotation.
3315    ///
3316    /// # Example
3317    ///
3318    /// ```rust
3319    /// # #[cfg(feature = "smol")]
3320    /// # use {
3321    /// #     smol::io::{Cursor, Empty},
3322    /// # };
3323    /// # #[cfg(feature = "tokio")]
3324    /// # use {
3325    /// #     std::io::Cursor,
3326    /// #     tokio::io::Empty,
3327    /// # };
3328    /// use smol_tar::{TarDirectory, TarEntry, TarWriter};
3329    ///
3330    /// # #[cfg(feature = "smol")]
3331    /// # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
3332    /// #   smol::future::block_on(fut)
3333    /// # }
3334    /// # #[cfg(feature = "tokio")]
3335    /// # fn block_on<F: std::future::Future>(fut: F) -> F::Output {
3336    /// #   tokio::runtime::Builder::new_current_thread().build().unwrap().block_on(fut)
3337    /// # }
3338    /// # block_on(async {
3339    /// let sink = Cursor::new(Vec::<u8>::new());
3340    /// let mut tar = TarWriter::<_, Empty>::new(sink);
3341    ///
3342    /// tar.write(TarEntry::from(TarDirectory::new("bin/"))).await?;
3343    /// tar.finish().await?;
3344    /// # std::io::Result::Ok(())
3345    /// # }).unwrap();
3346    /// ```
3347    pub struct TarWriter<'a, 'b, W, R> {
3348        // internal buffer for writing headers and file data
3349        buf: [u8; BLOCK_SIZE * 32],
3350        // length of valid data in the buffer
3351        len: usize,
3352        // current position in the buffer
3353        pos: usize,
3354        // current global position (number of bytes written)
3355        total: u64,
3356        // the end position of the current entry being written
3357        eof: u64,
3358        // closed
3359        closed: bool,
3360        // reader for the current file being written
3361        reader: Option<R>,
3362        marker_: std::marker::PhantomData<&'b ()>,
3363        // the underlying writer
3364        #[pin]
3365        writer: W,
3366        marker: std::marker::PhantomData<&'a ()>,
3367    }
3368}
3369
3370impl<'a, 'b, W: AsyncWrite + 'a, R: AsyncRead + Unpin + 'b> TarWriter<'a, 'b, W, R> {
3371    /// Create a writer that targets the provided [`AsyncWrite`] sink.
3372    pub fn new(writer: W) -> Self {
3373        Self {
3374            buf: [0; BLOCK_SIZE * 32],
3375            len: 0,
3376            pos: 0,
3377            total: 0,
3378            eof: 0,
3379            closed: false,
3380            reader: None,
3381            marker_: std::marker::PhantomData,
3382            writer,
3383            marker: std::marker::PhantomData,
3384        }
3385    }
3386
3387    /// Write a single tar entry and flush it to the underlying writer.
3388    pub async fn write(&mut self, entry: TarEntry<'b, R>) -> std::io::Result<()> {
3389        poll_fn(|cx| {
3390            // SAFETY: the mutable borrow of `self` is held for the duration of
3391            // this async method, so the writer cannot be moved while polled.
3392            unsafe { Pin::new_unchecked(&mut *self) }.poll_ready(cx)
3393        })
3394        .await?;
3395
3396        // SAFETY: same as above; `start_send` does not move out of `self`.
3397        unsafe { Pin::new_unchecked(&mut *self) }.start_send(entry)?;
3398
3399        poll_fn(|cx| {
3400            // SAFETY: the mutable borrow of `self` is held for the duration of
3401            // this async method, so the writer cannot be moved while polled.
3402            unsafe { Pin::new_unchecked(&mut *self) }.poll_flush(cx)
3403        })
3404        .await
3405    }
3406
3407    /// Finish the archive by emitting the trailing zero blocks and closing.
3408    pub async fn finish(&mut self) -> std::io::Result<()> {
3409        poll_fn(|cx| {
3410            // SAFETY: the mutable borrow of `self` is held for the duration of
3411            // this async method, so the writer cannot be moved while polled.
3412            unsafe { Pin::new_unchecked(&mut *self) }.poll_close(cx)
3413        })
3414        .await
3415    }
3416
3417    /// Drain internal buffer and current file reader to the underlying writer.
3418    ///
3419    /// The method alternates between flushing buffered headers, consuming the
3420    /// reader for the active file, and emitting zero padding so the next entry
3421    /// always starts on a block boundary.
3422    fn poll_drain(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
3423        let mut this = self.project();
3424        loop {
3425            while *this.pos < *this.len {
3426                let n = task::ready!(this
3427                    .writer
3428                    .as_mut()
3429                    .poll_write(cx, &this.buf[*this.pos..*this.len]))?;
3430                if n == 0 {
3431                    return Poll::Ready(Err(std::io::Error::new(
3432                        std::io::ErrorKind::WriteZero,
3433                        "error writing buffer",
3434                    )));
3435                }
3436                *this.pos += n;
3437                *this.total += n as u64;
3438            }
3439
3440            *this.pos = 0;
3441            *this.len = 0;
3442
3443            if let Some(reader) = this.reader.as_mut() {
3444                let remain = this.eof.saturating_sub(*this.total);
3445                if remain == 0 {
3446                    *this.reader = None;
3447                    let padded = padded_size(*this.eof);
3448                    let padding = padded.saturating_sub(*this.eof);
3449                    if padding > 0 {
3450                        *this.len = padding as usize;
3451                        this.buf[..*this.len].fill(0);
3452                        *this.eof = padded;
3453                        continue;
3454                    } else {
3455                        return Poll::Ready(Ok(()));
3456                    }
3457                }
3458                let buf_len = std::cmp::min(this.buf.len() as u64, remain) as usize;
3459                let n = task::ready!(poll_read_compat(pin!(reader), cx, &mut this.buf[..buf_len]))?;
3460                if n == 0 {
3461                    return Poll::Ready(Err(std::io::Error::new(
3462                        std::io::ErrorKind::UnexpectedEof,
3463                        "unexpected EOF while reading file",
3464                    )));
3465                }
3466                *this.pos = 0;
3467                *this.len = n;
3468                continue;
3469            } else {
3470                return Poll::Ready(Ok(()));
3471            }
3472        }
3473    }
3474}
3475
3476impl<'a, 'b, W: AsyncWrite + 'a, R: AsyncRead + Unpin + 'b> Sink<TarEntry<'b, R>>
3477    for TarWriter<'a, 'b, W, R>
3478{
3479    type Error = std::io::Error;
3480
3481    fn poll_ready(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
3482        self.poll_drain(cx)
3483    }
3484
3485    fn start_send(self: Pin<&mut Self>, item: TarEntry<'b, R>) -> std::io::Result<()> {
3486        let this = self.project();
3487
3488        if *this.len != 0 || this.reader.is_some() {
3489            return Err(std::io::Error::other(
3490                "start_send called while previous entry still in progress",
3491            ));
3492        }
3493
3494        let header_len = item.write_header(this.buf)?;
3495        *this.len = header_len;
3496
3497        if let TarEntry::File(file) = item {
3498            this.reader.replace(file.inner);
3499            *this.eof = *this.total + (header_len as u64) + file.size;
3500        }
3501
3502        Ok(())
3503    }
3504
3505    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
3506        task::ready!(self.as_mut().poll_drain(cx))?;
3507        let mut this = self.project();
3508        task::ready!(this.writer.as_mut().poll_flush(cx))?;
3509        Poll::Ready(Ok(()))
3510    }
3511
3512    fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
3513        task::ready!(self.as_mut().poll_drain(cx))?;
3514        {
3515            let this = self.as_mut().project();
3516            if !*this.closed {
3517                this.buf[..BLOCK_SIZE * 2].fill(0);
3518                *this.len = BLOCK_SIZE * 2;
3519                *this.closed = true;
3520            }
3521        }
3522        task::ready!(self.as_mut().poll_drain(cx))?;
3523        let mut this = self.project();
3524        poll_close_compat(this.writer.as_mut(), cx)
3525    }
3526}
3527
3528#[cfg(test)]
3529mod tests {
3530    use super::*;
3531    use static_assertions::{assert_eq_align, assert_eq_size, assert_impl_all, assert_obj_safe};
3532
3533    #[cfg(feature = "smol")]
3534    type AssertFile = smol::fs::File;
3535    #[cfg(feature = "tokio")]
3536    type AssertFile = tokio::fs::File;
3537
3538    assert_impl_all!(TarReader<AssertFile>: Send, Sync);
3539    assert_impl_all!(TarEntry<AssertFile>: Send, Sync);
3540    assert_obj_safe!(TarReader<AssertFile>);
3541    assert_obj_safe!(TarEntry<AssertFile>);
3542
3543    assert_eq_align!(Header, GnuHeader);
3544    assert_eq_size!(Header, GnuHeader);
3545    assert_eq_align!(Header, UstarHeader);
3546    assert_eq_size!(Header, UstarHeader);
3547    assert_eq_align!(Header, OldHeader);
3548    assert_eq_size!(Header, OldHeader);
3549
3550    #[test]
3551    fn low_level_format_helpers_cover_edge_cases() {
3552        assert_eq!(padded_size(0), 0);
3553        assert_eq!(padded_size(1), BLOCK_SIZE as u64);
3554
3555        let mut field = [0u8; 2];
3556        let err = format_octal(8, &mut field).unwrap_err();
3557        assert!(err.to_string().contains("too large to fit"));
3558
3559        let rec_len = pax_record_len("path", 3);
3560        let err = write_pax_record(&mut vec![0; rec_len - 1], "path", b"abc", rec_len).unwrap_err();
3561        assert!(err.to_string().contains("buffer too small"));
3562
3563        let err =
3564            write_pax_record(&mut vec![0; rec_len + 1], "path", b"abc", rec_len + 1).unwrap_err();
3565        assert!(err.to_string().contains("length mismatch"));
3566    }
3567
3568    #[test]
3569    fn posix_extension_and_kind_helpers_are_exercised() {
3570        let ext = PosixExtension::from("13 path=file\n".to_string().into_boxed_str());
3571        assert_eq!(&*ext, "13 path=file\n");
3572
3573        let long_name = ExtensionHeader::LongName("name".to_string().into_boxed_str());
3574        assert_eq!(&*long_name, "name");
3575
3576        let long_link = ExtensionHeader::LongLink("link".to_string().into_boxed_str());
3577        assert_eq!(&*long_link, "link");
3578
3579        let pax = ExtensionHeader::PosixExtension(ext);
3580        assert_eq!(&*pax, "13 path=file\n");
3581
3582        for (kind, expected) in [
3583            (Kind::File, "regular file"),
3584            (Kind::Link, "link"),
3585            (Kind::Symlink, "symlink"),
3586            (Kind::CharDevice, "character device"),
3587            (Kind::BlockDevice, "block device"),
3588            (Kind::Directory, "directory"),
3589            (Kind::Fifo, "FIFO"),
3590            (Kind::GNULongName, "GNU long name extension"),
3591            (Kind::GNULongLink, "GNU long link extension"),
3592            (Kind::PAXLocal, "PAX next file extension"),
3593            (Kind::PAXGlobal, "PAX global extension"),
3594        ] {
3595            assert_eq!(kind.to_string(), expected);
3596        }
3597    }
3598
3599    #[test]
3600    fn write_header_reports_small_buffers() {
3601        let err = write_header(
3602            &mut [0u8; BLOCK_SIZE - 1],
3603            "file.txt",
3604            None,
3605            Kind::File,
3606            0,
3607            0o644,
3608            0,
3609            0,
3610            None,
3611            None,
3612            &EntryTimes::default(),
3613            None,
3614            &AttrList::default(),
3615        )
3616        .unwrap_err();
3617        assert!(err.to_string().contains("buffer too small for tar header"));
3618
3619        let mut small = vec![0u8; BLOCK_SIZE + 10];
3620        let err = write_header(
3621            &mut small,
3622            "file.txt",
3623            None,
3624            Kind::File,
3625            0,
3626            0o644,
3627            0,
3628            0,
3629            None,
3630            None,
3631            &EntryTimes::default(),
3632            None,
3633            &AttrList::new().with("user.comment", b"hello".as_slice()),
3634        )
3635        .unwrap_err();
3636        assert!(err.to_string().contains("buffer too small for pax header"));
3637    }
3638}