async_tar/
entry.rs

1use std::{
2    borrow::Cow,
3    cmp, fmt, marker,
4    pin::Pin,
5    task::{Context, Poll},
6};
7
8#[cfg(feature = "runtime-async-std")]
9use async_std::{
10    fs,
11    fs::{OpenOptions, Permissions},
12    io::{self, Error, ErrorKind, SeekFrom, prelude::*},
13    path::{Component, Path, PathBuf},
14};
15use futures_core::ready;
16#[cfg(all(unix, feature = "runtime-tokio"))]
17use std::fs::Permissions;
18#[cfg(feature = "runtime-tokio")]
19use std::path::{Component, Path, PathBuf};
20#[cfg(feature = "runtime-tokio")]
21use tokio::{
22    fs,
23    fs::OpenOptions,
24    io::{self, AsyncRead as Read, AsyncReadExt, AsyncSeekExt, Error, ErrorKind, SeekFrom},
25};
26
27use filetime::{self, FileTime};
28
29use crate::{
30    Archive, Header, PaxExtensions, error::TarError, fs_canonicalize, header::bytes2path, other,
31    pax::pax_extensions, symlink_metadata,
32};
33
34/// A read-only view into an entry of an archive.
35///
36/// This structure is a window into a portion of a borrowed archive which can
37/// be inspected. It acts as a file handle by implementing the Reader trait. An
38/// entry cannot be rewritten once inserted into an archive.
39pub struct Entry<R: Read + Unpin> {
40    fields: EntryFields<R>,
41    _ignored: marker::PhantomData<Archive<R>>,
42}
43
44impl<R: Read + Unpin> fmt::Debug for Entry<R> {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        f.debug_struct("Entry")
47            .field("fields", &self.fields)
48            .finish()
49    }
50}
51
52// private implementation detail of `Entry`, but concrete (no type parameters)
53// and also all-public to be constructed from other modules.
54pub struct EntryFields<R: Read + Unpin> {
55    pub long_pathname: Option<Vec<u8>>,
56    pub long_linkname: Option<Vec<u8>>,
57    pub pax_extensions: Option<Vec<u8>>,
58    pub header: Header,
59    pub size: u64,
60    pub header_pos: u64,
61    pub file_pos: u64,
62    pub data: Vec<EntryIo<R>>,
63    pub unpack_xattrs: bool,
64    pub preserve_permissions: bool,
65    pub preserve_mtime: bool,
66    pub(crate) read_state: Option<EntryIo<R>>,
67}
68
69impl<R: Read + Unpin> fmt::Debug for EntryFields<R> {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        f.debug_struct("EntryFields")
72            .field("long_pathname", &self.long_pathname)
73            .field("long_linkname", &self.long_linkname)
74            .field("pax_extensions", &self.pax_extensions)
75            .field("header", &self.header)
76            .field("size", &self.size)
77            .field("header_pos", &self.header_pos)
78            .field("file_pos", &self.file_pos)
79            .field("data", &self.data)
80            .field("unpack_xattrs", &self.unpack_xattrs)
81            .field("preserve_permissions", &self.preserve_permissions)
82            .field("preserve_mtime", &self.preserve_mtime)
83            .field("read_state", &self.read_state)
84            .finish()
85    }
86}
87
88pub enum EntryIo<R: Read + Unpin> {
89    Pad(io::Take<io::Repeat>),
90    Data(io::Take<R>),
91}
92
93impl<R: Read + Unpin> fmt::Debug for EntryIo<R> {
94    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95        match self {
96            EntryIo::Pad(_) => write!(f, "EntryIo::Pad"),
97            EntryIo::Data(_) => write!(f, "EntryIo::Data"),
98        }
99    }
100}
101
102/// When unpacking items the unpacked thing is returned to allow custom
103/// additional handling by users. Today the File is returned, in future
104/// the enum may be extended with kinds for links, directories etc.
105#[derive(Debug)]
106#[non_exhaustive]
107pub enum Unpacked {
108    /// A file was unpacked.
109    File(fs::File),
110    /// A directory, hardlink, symlink, or other node was unpacked.
111    Other,
112}
113
114impl<R: Read + Unpin> Entry<R> {
115    /// Returns the path name for this entry.
116    ///
117    /// This method may fail if the pathname is not valid Unicode and this is
118    /// called on a Windows platform.
119    ///
120    /// Note that this function will convert any `\` characters to directory
121    /// separators, and it will not always return the same value as
122    /// `self.header().path()` as some archive formats have support for longer
123    /// path names described in separate entries.
124    ///
125    /// It is recommended to use this method instead of inspecting the `header`
126    /// directly to ensure that various archive formats are handled correctly.
127    pub fn path(&self) -> io::Result<Cow<'_, Path>> {
128        self.fields.path()
129    }
130
131    /// Returns the raw bytes listed for this entry.
132    ///
133    /// Note that this function will convert any `\` characters to directory
134    /// separators, and it will not always return the same value as
135    /// `self.header().path_bytes()` as some archive formats have support for
136    /// longer path names described in separate entries.
137    pub fn path_bytes(&self) -> Cow<'_, [u8]> {
138        self.fields.path_bytes()
139    }
140
141    /// Returns the link name for this entry, if any is found.
142    ///
143    /// This method may fail if the pathname is not valid Unicode and this is
144    /// called on a Windows platform. `Ok(None)` being returned, however,
145    /// indicates that the link name was not present.
146    ///
147    /// Note that this function will convert any `\` characters to directory
148    /// separators, and it will not always return the same value as
149    /// `self.header().link_name()` as some archive formats have support for
150    /// longer path names described in separate entries.
151    ///
152    /// It is recommended to use this method instead of inspecting the `header`
153    /// directly to ensure that various archive formats are handled correctly.
154    pub fn link_name(&self) -> io::Result<Option<Cow<'_, Path>>> {
155        self.fields.link_name()
156    }
157
158    /// Returns the link name for this entry, in bytes, if listed.
159    ///
160    /// Note that this will not always return the same value as
161    /// `self.header().link_name_bytes()` as some archive formats have support for
162    /// longer path names described in separate entries.
163    pub fn link_name_bytes(&self) -> Option<Cow<'_, [u8]>> {
164        self.fields.link_name_bytes()
165    }
166
167    /// Returns an iterator over the pax extensions contained in this entry.
168    ///
169    /// Pax extensions are a form of archive where extra metadata is stored in
170    /// key/value pairs in entries before the entry they're intended to
171    /// describe. For example this can be used to describe long file name or
172    /// other metadata like atime/ctime/mtime in more precision.
173    ///
174    /// The returned iterator will yield key/value pairs for each extension.
175    ///
176    /// `None` will be returned if this entry does not indicate that it itself
177    /// contains extensions, or if there were no previous extensions describing
178    /// it.
179    ///
180    /// Note that global pax extensions are intended to be applied to all
181    /// archive entries.
182    ///
183    /// Also note that this function will read the entire entry if the entry
184    /// itself is a list of extensions.
185    pub async fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions<'_>>> {
186        self.fields.pax_extensions().await
187    }
188
189    /// Returns access to the header of this entry in the archive.
190    ///
191    /// This provides access to the metadata for this entry in the archive.
192    pub fn header(&self) -> &Header {
193        &self.fields.header
194    }
195
196    /// Returns the starting position, in bytes, of the header of this entry in
197    /// the archive.
198    ///
199    /// The header is always a contiguous section of 512 bytes, so if the
200    /// underlying reader implements `Seek`, then the slice from `header_pos` to
201    /// `header_pos + 512` contains the raw header bytes.
202    pub fn raw_header_position(&self) -> u64 {
203        self.fields.header_pos
204    }
205
206    /// Returns the starting position, in bytes, of the file of this entry in
207    /// the archive.
208    ///
209    /// If the file of this entry is continuous (e.g. not a sparse file), and
210    /// if the underlying reader implements `Seek`, then the slice from
211    /// `file_pos` to `file_pos + entry_size` contains the raw file bytes.
212    pub fn raw_file_position(&self) -> u64 {
213        self.fields.file_pos
214    }
215
216    /// Writes this file to the specified location.
217    ///
218    /// This function will write the entire contents of this file into the
219    /// location specified by `dst`. Metadata will also be propagated to the
220    /// path `dst`.
221    ///
222    /// This function will create a file at the path `dst`, and it is required
223    /// that the intermediate directories are created. Any existing file at the
224    /// location `dst` will be overwritten.
225    ///
226    /// > **Note**: This function does not have as many sanity checks as
227    /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're
228    /// > thinking of unpacking untrusted tarballs you may want to review the
229    /// > implementations of the previous two functions and perhaps implement
230    /// > similar logic yourself.
231    ///
232    /// # Examples
233    ///
234    #[cfg_attr(feature = "runtime-async-std", doc = "```no_run")]
235    #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
236    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
237    /// #
238    /// use async_std::fs::File;
239    /// use async_std::prelude::*;
240    /// use async_tar::Archive;
241    ///
242    /// let mut ar = Archive::new(File::open("foo.tar").await?);
243    /// let mut entries = ar.entries()?;
244    /// let mut i = 0;
245    /// while let Some(file) = entries.next().await {
246    ///     let mut file = file?;
247    ///     file.unpack(format!("file-{}", i)).await?;
248    ///     i += 1;
249    /// }
250    /// #
251    /// # Ok(()) }) }
252    /// ```
253    pub async fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> {
254        self.fields.unpack(None, dst.as_ref()).await
255    }
256
257    /// Extracts this file under the specified path, avoiding security issues.
258    ///
259    /// This function will write the entire contents of this file into the
260    /// location obtained by appending the path of this file in the archive to
261    /// `dst`, creating any intermediate directories if needed. Metadata will
262    /// also be propagated to the path `dst`. Any existing file at the location
263    /// `dst` will be overwritten.
264    ///
265    /// This function carefully avoids writing outside of `dst`. If the file has
266    /// a '..' in its path, this function will skip it and return false.
267    ///
268    /// # Examples
269    ///
270    #[cfg_attr(feature = "runtime-async-std", doc = "```no_run")]
271    #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
272    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
273    /// #
274    /// use async_std::fs::File;
275    /// use async_tar::Archive;
276    /// use async_std::prelude::*;
277    ///
278    /// let mut ar = Archive::new(File::open("foo.tar").await?);
279    /// let mut entries = ar.entries()?;
280    /// let mut i = 0;
281    /// while let Some(file) = entries.next().await {
282    ///     let mut file = file.unwrap();
283    ///     file.unpack_in("target").await?;
284    ///     i += 1;
285    /// }
286    /// #
287    /// # Ok(()) }) }
288    /// ```
289    pub async fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<bool> {
290        self.fields.unpack_in(dst.as_ref()).await
291    }
292
293    /// Indicate whether extended file attributes (xattrs on Unix) are preserved
294    /// when unpacking this entry.
295    ///
296    /// This flag is disabled by default and is currently only implemented on
297    /// Unix using xattr support. This may eventually be implemented for
298    /// Windows, however, if other archive implementations are found which do
299    /// this as well.
300    pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
301        self.fields.unpack_xattrs = unpack_xattrs;
302    }
303
304    /// Indicate whether extended permissions (like suid on Unix) are preserved
305    /// when unpacking this entry.
306    ///
307    /// This flag is disabled by default and is currently only implemented on
308    /// Unix.
309    pub fn set_preserve_permissions(&mut self, preserve: bool) {
310        self.fields.preserve_permissions = preserve;
311    }
312
313    /// Indicate whether access time information is preserved when unpacking
314    /// this entry.
315    ///
316    /// This flag is enabled by default.
317    pub fn set_preserve_mtime(&mut self, preserve: bool) {
318        self.fields.preserve_mtime = preserve;
319    }
320}
321
322#[cfg(feature = "runtime-async-std")]
323impl<R: Read + Unpin> Read for Entry<R> {
324    fn poll_read(
325        mut self: Pin<&mut Self>,
326        cx: &mut Context<'_>,
327        into: &mut [u8],
328    ) -> Poll<io::Result<usize>> {
329        Pin::new(&mut self.fields).poll_read(cx, into)
330    }
331}
332
333#[cfg(feature = "runtime-tokio")]
334impl<R: Read + Unpin> Read for Entry<R> {
335    fn poll_read(
336        mut self: Pin<&mut Self>,
337        cx: &mut Context<'_>,
338        into: &mut tokio::io::ReadBuf,
339    ) -> Poll<io::Result<()>> {
340        Pin::new(&mut self.fields).poll_read(cx, into)
341    }
342}
343
344impl<R: Read + Unpin> EntryFields<R> {
345    pub fn from(entry: Entry<R>) -> Self {
346        entry.fields
347    }
348
349    pub fn into_entry(self) -> Entry<R> {
350        Entry {
351            fields: self,
352            _ignored: marker::PhantomData,
353        }
354    }
355
356    pub(crate) fn poll_read_all(
357        self: Pin<&mut Self>,
358        cx: &mut Context<'_>,
359    ) -> Poll<io::Result<Vec<u8>>> {
360        // Preallocate some data but don't let ourselves get too crazy now.
361        let cap = cmp::min(self.size, 128 * 1024);
362        let mut buf = Vec::with_capacity(cap as usize);
363
364        // Copied from futures::ReadToEnd
365        match ready!(poll_read_all_internal(self, cx, &mut buf)) {
366            Ok(_) => Poll::Ready(Ok(buf)),
367            Err(err) => Poll::Ready(Err(err)),
368        }
369    }
370
371    pub async fn read_all(&mut self) -> io::Result<Vec<u8>> {
372        // Preallocate some data but don't let ourselves get too crazy now.
373        let cap = cmp::min(self.size, 128 * 1024);
374        let mut v = Vec::with_capacity(cap as usize);
375        self.read_to_end(&mut v).await.map(|_| v)
376    }
377
378    fn path(&self) -> io::Result<Cow<'_, Path>> {
379        bytes2path(self.path_bytes())
380    }
381
382    fn path_bytes(&self) -> Cow<'_, [u8]> {
383        if let Some(ref bytes) = self.long_pathname {
384            if let Some(&0) = bytes.last() {
385                Cow::Borrowed(&bytes[..bytes.len() - 1])
386            } else {
387                Cow::Borrowed(bytes)
388            }
389        } else {
390            if let Some(ref pax) = self.pax_extensions {
391                let pax = pax_extensions(pax)
392                    .filter_map(Result::ok)
393                    .find(|f| f.key_bytes() == b"path")
394                    .map(|f| f.value_bytes());
395                if let Some(field) = pax {
396                    return Cow::Borrowed(field);
397                }
398            }
399            self.header.path_bytes()
400        }
401    }
402
403    /// Gets the path in a "lossy" way, used for error reporting ONLY.
404    fn path_lossy(&self) -> String {
405        String::from_utf8_lossy(&self.path_bytes()).to_string()
406    }
407
408    fn link_name(&self) -> io::Result<Option<Cow<'_, Path>>> {
409        match self.link_name_bytes() {
410            Some(bytes) => bytes2path(bytes).map(Some),
411            None => Ok(None),
412        }
413    }
414
415    fn link_name_bytes(&self) -> Option<Cow<'_, [u8]>> {
416        match self.long_linkname {
417            Some(ref bytes) => {
418                if let Some(&0) = bytes.last() {
419                    Some(Cow::Borrowed(&bytes[..bytes.len() - 1]))
420                } else {
421                    Some(Cow::Borrowed(bytes))
422                }
423            }
424            None => self.header.link_name_bytes(),
425        }
426    }
427
428    async fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions<'_>>> {
429        if self.pax_extensions.is_none() {
430            if !self.header.entry_type().is_pax_global_extensions()
431                && !self.header.entry_type().is_pax_local_extensions()
432            {
433                return Ok(None);
434            }
435            self.pax_extensions = Some(self.read_all().await?);
436        }
437        Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap())))
438    }
439
440    async fn unpack_in(&mut self, dst: &Path) -> io::Result<bool> {
441        // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
442        // * Leading '/'s are trimmed. For example, `///test` is treated as
443        //   `test`.
444        // * If the filename contains '..', then the file is skipped when
445        //   extracting the tarball.
446        // * '//' within a filename is effectively skipped. An error is
447        //   logged, but otherwise the effect is as if any two or more
448        //   adjacent '/'s within the filename were consolidated into one
449        //   '/'.
450        //
451        // Most of this is handled by the `path` module of the standard
452        // library, but we specially handle a few cases here as well.
453
454        let mut file_dst = dst.to_path_buf();
455        {
456            let path = self.path().map_err(|e| {
457                TarError::new(
458                    &format!("invalid path in entry header: {}", self.path_lossy()),
459                    e,
460                )
461            })?;
462            for part in path.components() {
463                match part {
464                    // Leading '/' characters, root paths, and '.'
465                    // components are just ignored and treated as "empty
466                    // components"
467                    Component::Prefix(..) | Component::RootDir | Component::CurDir => continue,
468
469                    // If any part of the filename is '..', then skip over
470                    // unpacking the file to prevent directory traversal
471                    // security issues.  See, e.g.: CVE-2001-1267,
472                    // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
473                    Component::ParentDir => return Ok(false),
474
475                    Component::Normal(part) => file_dst.push(part),
476                }
477            }
478        }
479
480        // Skip cases where only slashes or '.' parts were seen, because
481        // this is effectively an empty filename.
482        if *dst == *file_dst {
483            return Ok(true);
484        }
485
486        // Skip entries without a parent (i.e. outside of FS root)
487        let parent = match file_dst.parent() {
488            Some(p) => p,
489            None => return Ok(false),
490        };
491
492        self.ensure_dir_created(dst, parent)
493            .await
494            .map_err(|e| TarError::new(&format!("failed to create `{}`", parent.display()), e))?;
495
496        let canon_target = self.validate_inside_dst(dst, parent).await?;
497
498        self.unpack(Some(&canon_target), &file_dst)
499            .await
500            .map_err(|e| TarError::new(&format!("failed to unpack `{}`", file_dst.display()), e))?;
501
502        Ok(true)
503    }
504
505    /// Unpack as destination directory `dst`.
506    async fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> {
507        // If the directory already exists just let it slide
508        match fs::create_dir(dst).await {
509            Ok(()) => Ok(()),
510            Err(err) => {
511                if err.kind() == ErrorKind::AlreadyExists {
512                    let prev = fs::metadata(dst).await;
513                    if prev.map(|m| m.is_dir()).unwrap_or(false) {
514                        return Ok(());
515                    }
516                }
517                Err(Error::new(
518                    err.kind(),
519                    format!("{} when creating dir {}", err, dst.display()),
520                ))
521            }
522        }
523    }
524
525    /// Returns access to the header of this entry in the archive.
526    async fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> {
527        let kind = self.header.entry_type();
528
529        if kind.is_dir() {
530            self.unpack_dir(dst).await?;
531            if let Ok(mode) = self.header.mode() {
532                set_perms(dst, None, mode, self.preserve_permissions).await?;
533            }
534            return Ok(Unpacked::Other);
535        } else if kind.is_hard_link() || kind.is_symlink() {
536            let src = match self.link_name()? {
537                Some(name) => name,
538                None => {
539                    return Err(other(&format!(
540                        "hard link listed for {} but no link name found",
541                        String::from_utf8_lossy(self.header.as_bytes())
542                    )));
543                }
544            };
545
546            if src.iter().count() == 0 {
547                return Err(other(&format!(
548                    "symlink destination for {} is empty",
549                    String::from_utf8_lossy(self.header.as_bytes())
550                )));
551            }
552
553            if kind.is_hard_link() {
554                let link_src = match target_base {
555                    // If we're unpacking within a directory then ensure that
556                    // the destination of this hard link is both present and
557                    // inside our own directory. This is needed because we want
558                    // to make sure to not overwrite anything outside the root.
559                    //
560                    // Note that this logic is only needed for hard links
561                    // currently. With symlinks the `validate_inside_dst` which
562                    // happens before this method as part of `unpack_in` will
563                    // use canonicalization to ensure this guarantee. For hard
564                    // links though they're canonicalized to their existing path
565                    // so we need to validate at this time.
566                    Some(p) => {
567                        let link_src = p.join(src);
568                        self.validate_inside_dst(p, &link_src).await?;
569                        link_src
570                    }
571                    None => src.into_owned(),
572                };
573                fs::hard_link(&link_src, dst).await.map_err(|err| {
574                    Error::new(
575                        err.kind(),
576                        format!(
577                            "{} when hard linking {} to {}",
578                            err,
579                            link_src.display(),
580                            dst.display()
581                        ),
582                    )
583                })?;
584            } else {
585                symlink(&src, dst).await.map_err(|err| {
586                    Error::new(
587                        err.kind(),
588                        format!(
589                            "{} when symlinking {} to {}",
590                            err,
591                            src.display(),
592                            dst.display()
593                        ),
594                    )
595                })?;
596            };
597            return Ok(Unpacked::Other);
598
599            #[cfg(target_arch = "wasm32")]
600            #[allow(unused_variables)]
601            async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
602                Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
603            }
604
605            #[cfg(windows)]
606            async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
607                #[cfg(feature = "runtime-async-std")]
608                {
609                    async_std::os::windows::fs::symlink_file(src, dst).await
610                }
611                #[cfg(feature = "runtime-tokio")]
612                {
613                    tokio::fs::symlink_file(src, dst).await
614                }
615            }
616
617            #[cfg(any(unix, target_os = "redox"))]
618            async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
619                #[cfg(feature = "runtime-async-std")]
620                async_std::os::unix::fs::symlink(src, dst).await?;
621                #[cfg(feature = "runtime-tokio")]
622                tokio::fs::symlink(src, dst).await?;
623
624                Ok(())
625            }
626        } else if kind.is_pax_global_extensions()
627            || kind.is_pax_local_extensions()
628            || kind.is_gnu_longname()
629            || kind.is_gnu_longlink()
630        {
631            return Ok(Unpacked::Other);
632        };
633
634        // Old BSD-tar compatibility.
635        // Names that have a trailing slash should be treated as a directory.
636        // Only applies to old headers.
637        if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") {
638            self.unpack_dir(dst).await?;
639            if let Ok(mode) = self.header.mode() {
640                set_perms(dst, None, mode, self.preserve_permissions).await?;
641            }
642            return Ok(Unpacked::Other);
643        }
644
645        // Note the lack of `else` clause above. According to the FreeBSD
646        // documentation:
647        //
648        // > A POSIX-compliant implementation must treat any unrecognized
649        // > typeflag value as a regular file.
650        //
651        // As a result if we don't recognize the kind we just write out the file
652        // as we would normally.
653
654        // Ensure we write a new file rather than overwriting in-place which
655        // is attackable; if an existing file is found unlink it.
656        async fn open(dst: &Path) -> io::Result<fs::File> {
657            OpenOptions::new()
658                .write(true)
659                .create_new(true)
660                .open(dst)
661                .await
662        }
663        let mut f = async {
664            let mut f = match open(dst).await {
665                Ok(f) => Ok(f),
666                Err(err) => {
667                    if err.kind() == ErrorKind::AlreadyExists {
668                        match fs::remove_file(dst).await {
669                            Ok(()) => open(dst).await,
670                            Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst).await,
671                            Err(e) => Err(e),
672                        }
673                    } else {
674                        Err(err)
675                    }
676                }
677            }?;
678            for io in self.data.drain(..) {
679                match io {
680                    EntryIo::Data(mut d) => {
681                        let expected = d.limit();
682                        if io::copy(&mut d, &mut f).await? != expected {
683                            return Err(other("failed to write entire file"));
684                        }
685                    }
686                    EntryIo::Pad(d) => {
687                        // TODO: checked cast to i64
688                        let to = SeekFrom::Current(d.limit() as i64);
689                        let size = f.seek(to).await?;
690                        f.set_len(size).await?;
691                    }
692                }
693            }
694            Ok::<fs::File, io::Error>(f)
695        }
696        .await
697        .map_err(|e| {
698            let header = self.header.path_bytes();
699            TarError::new(
700                &format!(
701                    "failed to unpack `{}` into `{}`",
702                    String::from_utf8_lossy(&header),
703                    dst.display()
704                ),
705                e,
706            )
707        })?;
708
709        if self.preserve_mtime {
710            if let Ok(mtime) = self.header.mtime() {
711                let mtime = FileTime::from_unix_time(mtime as i64, 0);
712                filetime::set_file_times(dst, mtime, mtime).map_err(|e| {
713                    TarError::new(&format!("failed to set mtime for `{}`", dst.display()), e)
714                })?;
715            }
716        }
717        if let Ok(mode) = self.header.mode() {
718            set_perms(dst, Some(&mut f), mode, self.preserve_permissions).await?;
719        }
720        if self.unpack_xattrs {
721            set_xattrs(self, dst).await?;
722        }
723        return Ok(Unpacked::File(f));
724
725        async fn set_perms(
726            dst: &Path,
727            f: Option<&mut fs::File>,
728            mode: u32,
729            preserve: bool,
730        ) -> Result<(), TarError> {
731            _set_perms(dst, f, mode, preserve).await.map_err(|e| {
732                TarError::new(
733                    &format!(
734                        "failed to set permissions to {:o} \
735                         for `{}`",
736                        mode,
737                        dst.display()
738                    ),
739                    e,
740                )
741            })
742        }
743
744        #[cfg(any(unix, target_os = "redox"))]
745        async fn _set_perms(
746            dst: &Path,
747            f: Option<&mut fs::File>,
748            mode: u32,
749            preserve: bool,
750        ) -> io::Result<()> {
751            use std::os::unix::prelude::*;
752
753            let mode = if preserve { mode } else { mode & 0o777 };
754            let perm = Permissions::from_mode(mode as _);
755            match f {
756                Some(f) => f.set_permissions(perm).await,
757                None => fs::set_permissions(dst, perm).await,
758            }
759        }
760
761        #[cfg(windows)]
762        async fn _set_perms(
763            dst: &Path,
764            f: Option<&mut fs::File>,
765            mode: u32,
766            _preserve: bool,
767        ) -> io::Result<()> {
768            if mode & 0o200 == 0o200 {
769                return Ok(());
770            }
771            match f {
772                Some(f) => {
773                    let mut perm = f.metadata().await?.permissions();
774                    perm.set_readonly(true);
775                    f.set_permissions(perm).await
776                }
777                None => {
778                    let mut perm = fs::metadata(dst).await?.permissions();
779                    perm.set_readonly(true);
780                    fs::set_permissions(dst, perm).await
781                }
782            }
783        }
784
785        #[cfg(target_arch = "wasm32")]
786        #[allow(unused_variables)]
787        async fn _set_perms(
788            dst: &Path,
789            f: Option<&mut fs::File>,
790            mode: u32,
791            _preserve: bool,
792        ) -> io::Result<()> {
793            Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
794        }
795
796        #[cfg(all(unix, feature = "xattr"))]
797        async fn set_xattrs<R: Read + Unpin>(
798            me: &mut EntryFields<R>,
799            dst: &Path,
800        ) -> io::Result<()> {
801            use std::{ffi::OsStr, os::unix::prelude::*};
802
803            let exts = match me.pax_extensions().await {
804                Ok(Some(e)) => e,
805                _ => return Ok(()),
806            };
807            let exts = exts
808                .filter_map(Result::ok)
809                .filter_map(|e| {
810                    let key = e.key_bytes();
811                    let prefix = b"SCHILY.xattr.";
812                    if key.starts_with(prefix) {
813                        Some((&key[prefix.len()..], e))
814                    } else {
815                        None
816                    }
817                })
818                .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes()));
819
820            for (key, value) in exts {
821                xattr::set(dst, key, value).map_err(|e| {
822                    TarError::new(
823                        &format!(
824                            "failed to set extended \
825                             attributes to {}. \
826                             Xattrs: key={:?}, value={:?}.",
827                            dst.display(),
828                            key,
829                            String::from_utf8_lossy(value)
830                        ),
831                        e,
832                    )
833                })?;
834            }
835
836            Ok(())
837        }
838        // Windows does not completely support posix xattrs
839        // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT
840        #[cfg(any(
841            windows,
842            target_os = "redox",
843            not(feature = "xattr"),
844            target_arch = "wasm32"
845        ))]
846        async fn set_xattrs<R: Read + Unpin>(_: &mut EntryFields<R>, _: &Path) -> io::Result<()> {
847            Ok(())
848        }
849    }
850
851    async fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> {
852        let mut ancestor = dir;
853        let mut dirs_to_create = Vec::new();
854
855        while symlink_metadata(ancestor).await.is_err() {
856            dirs_to_create.push(ancestor);
857            if let Some(parent) = ancestor.parent() {
858                ancestor = parent;
859            } else {
860                break;
861            }
862        }
863        for ancestor in dirs_to_create.into_iter().rev() {
864            if let Some(parent) = ancestor.parent() {
865                self.validate_inside_dst(dst, parent).await?;
866            }
867            fs::create_dir(ancestor).await?;
868        }
869        Ok(())
870    }
871
872    async fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<PathBuf> {
873        // Abort if target (canonical) parent is outside of `dst`
874        let canon_parent = fs_canonicalize(file_dst).await.map_err(|err| {
875            Error::new(
876                err.kind(),
877                format!("{} while canonicalizing {}", err, file_dst.display()),
878            )
879        })?;
880        let canon_target = fs_canonicalize(dst).await.map_err(|err| {
881            Error::new(
882                err.kind(),
883                format!("{} while canonicalizing {}", err, dst.display()),
884            )
885        })?;
886        if !canon_parent.starts_with(&canon_target) {
887            let err = TarError::new(
888                &format!(
889                    "trying to unpack outside of destination path: {}",
890                    canon_target.display()
891                ),
892                // TODO: use ErrorKind::InvalidInput here? (minor breaking change)
893                Error::other("Invalid argument"),
894            );
895            return Err(err.into());
896        }
897        Ok(canon_target)
898    }
899}
900
901#[cfg(feature = "runtime-async-std")]
902impl<R: Read + Unpin> Read for EntryFields<R> {
903    fn poll_read(
904        mut self: Pin<&mut Self>,
905        cx: &mut Context<'_>,
906        into: &mut [u8],
907    ) -> Poll<io::Result<usize>> {
908        loop {
909            if self.read_state.is_none() {
910                if self.data.is_empty() {
911                    self.read_state = None;
912                } else {
913                    self.read_state = Some(self.data.remove(0));
914                }
915            }
916
917            if let Some(io) = &mut self.read_state {
918                let ret = Pin::new(io).poll_read(cx, into);
919                match ret {
920                    Poll::Ready(Ok(0)) => {
921                        self.read_state = None;
922                        if self.data.is_empty() {
923                            return Poll::Ready(Ok(0));
924                        }
925                        continue;
926                    }
927                    Poll::Ready(Ok(val)) => {
928                        return Poll::Ready(Ok(val));
929                    }
930                    Poll::Ready(Err(err)) => {
931                        return Poll::Ready(Err(err));
932                    }
933                    Poll::Pending => {
934                        return Poll::Pending;
935                    }
936                }
937            }
938            // Unable to pull another value from `data`, so we are done.
939            return Poll::Ready(Ok(0));
940        }
941    }
942}
943
944#[cfg(feature = "runtime-tokio")]
945impl<R: Read + Unpin> Read for EntryFields<R> {
946    fn poll_read(
947        mut self: Pin<&mut Self>,
948        cx: &mut Context<'_>,
949        into: &mut tokio::io::ReadBuf,
950    ) -> Poll<io::Result<()>> {
951        loop {
952            if self.read_state.is_none() {
953                if self.data.is_empty() {
954                    self.read_state = None;
955                } else {
956                    self.read_state = Some(self.data.remove(0));
957                }
958            }
959
960            if let Some(io) = &mut self.read_state {
961                let start = into.filled().len();
962                let ret = Pin::new(io).poll_read(cx, into);
963                match ret {
964                    Poll::Ready(Ok(())) => {
965                        let diff = into.filled().len() - start;
966                        if diff == 0 {
967                            self.read_state = None;
968                            if self.data.is_empty() {
969                                return Poll::Ready(Ok(()));
970                            }
971                            continue;
972                        } else {
973                            return Poll::Ready(Ok(()));
974                        }
975                    }
976                    Poll::Ready(Err(err)) => {
977                        return Poll::Ready(Err(err));
978                    }
979                    Poll::Pending => {
980                        return Poll::Pending;
981                    }
982                }
983            }
984            // Unable to pull another value from `data`, so we are done.
985            return Poll::Ready(Ok(()));
986        }
987    }
988}
989
990#[cfg(feature = "runtime-async-std")]
991impl<R: Read + Unpin> Read for EntryIo<R> {
992    fn poll_read(
993        mut self: Pin<&mut Self>,
994        cx: &mut Context<'_>,
995        into: &mut [u8],
996    ) -> Poll<io::Result<usize>> {
997        match &mut *self {
998            EntryIo::Pad(io) => Pin::new(io).poll_read(cx, into),
999            EntryIo::Data(io) => Pin::new(io).poll_read(cx, into),
1000        }
1001    }
1002}
1003
1004#[cfg(feature = "runtime-tokio")]
1005impl<R: Read + Unpin> Read for EntryIo<R> {
1006    fn poll_read(
1007        mut self: Pin<&mut Self>,
1008        cx: &mut Context<'_>,
1009        into: &mut tokio::io::ReadBuf,
1010    ) -> Poll<io::Result<()>> {
1011        match &mut *self {
1012            EntryIo::Pad(io) => Pin::new(io).poll_read(cx, into),
1013            EntryIo::Data(io) => Pin::new(io).poll_read(cx, into),
1014        }
1015    }
1016}
1017
1018struct Guard<'a> {
1019    buf: &'a mut Vec<u8>,
1020    len: usize,
1021}
1022
1023impl Drop for Guard<'_> {
1024    fn drop(&mut self) {
1025        unsafe {
1026            self.buf.set_len(self.len);
1027        }
1028    }
1029}
1030
1031#[cfg(feature = "runtime-async-std")]
1032fn poll_read_all_internal<R: Read + ?Sized>(
1033    mut rd: Pin<&mut R>,
1034    cx: &mut Context<'_>,
1035    buf: &mut Vec<u8>,
1036) -> Poll<io::Result<usize>> {
1037    let mut g = Guard {
1038        len: buf.len(),
1039        buf,
1040    };
1041    let ret;
1042    loop {
1043        if g.len == g.buf.len() {
1044            unsafe {
1045                g.buf.reserve(32);
1046                let capacity = g.buf.capacity();
1047                g.buf.set_len(capacity);
1048
1049                let buf = &mut g.buf[g.len..];
1050                std::ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len());
1051            }
1052        }
1053
1054        match ready!(rd.as_mut().poll_read(cx, &mut g.buf[g.len..])) {
1055            Ok(0) => {
1056                ret = Poll::Ready(Ok(g.len));
1057                break;
1058            }
1059            Ok(n) => g.len += n,
1060            Err(e) => {
1061                ret = Poll::Ready(Err(e));
1062                break;
1063            }
1064        }
1065    }
1066
1067    ret
1068}
1069
1070#[cfg(feature = "runtime-tokio")]
1071fn poll_read_all_internal<R: Read + ?Sized>(
1072    mut rd: Pin<&mut R>,
1073    cx: &mut Context<'_>,
1074    buf: &mut Vec<u8>,
1075) -> Poll<io::Result<usize>> {
1076    let mut g = Guard {
1077        len: buf.len(),
1078        buf,
1079    };
1080    let ret;
1081    loop {
1082        if g.len == g.buf.len() {
1083            unsafe {
1084                g.buf.reserve(32);
1085                let capacity = g.buf.capacity();
1086                g.buf.set_len(capacity);
1087
1088                let buf = &mut g.buf[g.len..];
1089                std::ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len());
1090            }
1091        }
1092
1093        let mut read_buf = io::ReadBuf::new(&mut g.buf[g.len..]);
1094        let start = read_buf.filled().len();
1095        match ready!(rd.as_mut().poll_read(cx, &mut read_buf)) {
1096            Ok(()) => {
1097                let diff = read_buf.filled().len() - start;
1098                if diff == 0 {
1099                    ret = Poll::Ready(Ok(g.len));
1100                    break;
1101                } else {
1102                    g.len += diff;
1103                }
1104            }
1105            Err(e) => {
1106                ret = Poll::Ready(Err(e));
1107                break;
1108            }
1109        }
1110    }
1111
1112    ret
1113}