tar_rsl/
archive.rs

1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::other;
13use crate::pax::*;
14use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
15
16/// A top-level representation of an archive file.
17///
18/// This archive can have an entry added to it and it can be iterated over.
19pub struct Archive<R: ?Sized + Read> {
20    inner: ArchiveInner<R>,
21}
22
23pub struct ArchiveInner<R: ?Sized> {
24    pos: Cell<u64>,
25    mask: u32,
26    unpack_xattrs: bool,
27    preserve_permissions: bool,
28    preserve_ownerships: bool,
29    preserve_mtime: bool,
30    overwrite: bool,
31    ignore_zeros: bool,
32    obj: RefCell<R>,
33}
34
35/// An iterator over the entries of an archive.
36pub struct Entries<'a, R: 'a + Read> {
37    fields: EntriesFields<'a>,
38    _ignored: marker::PhantomData<&'a Archive<R>>,
39}
40
41trait SeekRead: Read + Seek {}
42impl<R: Read + Seek> SeekRead for R {}
43
44struct EntriesFields<'a> {
45    archive: &'a Archive<dyn Read + 'a>,
46    seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
47    next: u64,
48    done: bool,
49    raw: bool,
50}
51
52impl<R: Read> Archive<R> {
53    /// Create a new archive with the underlying object as the reader.
54    pub fn new(obj: R) -> Archive<R> {
55        Archive {
56            inner: ArchiveInner {
57                mask: u32::MIN,
58                unpack_xattrs: false,
59                preserve_permissions: false,
60                preserve_ownerships: false,
61                preserve_mtime: true,
62                overwrite: true,
63                ignore_zeros: false,
64                obj: RefCell::new(obj),
65                pos: Cell::new(0),
66            },
67        }
68    }
69
70    /// Unwrap this archive, returning the underlying object.
71    pub fn into_inner(self) -> R {
72        self.inner.obj.into_inner()
73    }
74
75    /// Construct an iterator over the entries in this archive.
76    ///
77    /// Note that care must be taken to consider each entry within an archive in
78    /// sequence. If entries are processed out of sequence (from what the
79    /// iterator returns), then the contents read for each entry may be
80    /// corrupted.
81    pub fn entries(&mut self) -> io::Result<Entries<R>> {
82        let me: &mut Archive<dyn Read> = self;
83        me._entries(None).map(|fields| Entries {
84            fields: fields,
85            _ignored: marker::PhantomData,
86        })
87    }
88
89    /// Unpacks the contents tarball into the specified `dst`.
90    ///
91    /// This function will iterate over the entire contents of this tarball,
92    /// extracting each file in turn to the location specified by the entry's
93    /// path name.
94    ///
95    /// This operation is relatively sensitive in that it will not write files
96    /// outside of the path specified by `dst`. Files in the archive which have
97    /// a '..' in their path are skipped during the unpacking process.
98    ///
99    /// # Examples
100    ///
101    /// ```no_run
102    /// use std::fs::File;
103    /// use tar::Archive;
104    ///
105    /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
106    /// ar.unpack("foo").unwrap();
107    /// ```
108    pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
109        let me: &mut Archive<dyn Read> = self;
110        me._unpack(dst.as_ref())
111    }
112
113    /// Set the mask of the permission bits when unpacking this entry.
114    ///
115    /// The mask will be inverted when applying against a mode, similar to how
116    /// `umask` works on Unix. In logical notation it looks like:
117    ///
118    /// ```text
119    /// new_mode = old_mode & (~mask)
120    /// ```
121    ///
122    /// The mask is 0 by default and is currently only implemented on Unix.
123    pub fn set_mask(&mut self, mask: u32) {
124        self.inner.mask = mask;
125    }
126
127    /// Indicate whether extended file attributes (xattrs on Unix) are preserved
128    /// when unpacking this archive.
129    ///
130    /// This flag is disabled by default and is currently only implemented on
131    /// Unix using xattr support. This may eventually be implemented for
132    /// Windows, however, if other archive implementations are found which do
133    /// this as well.
134    pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
135        self.inner.unpack_xattrs = unpack_xattrs;
136    }
137
138    /// Indicate whether extended permissions (like suid on Unix) are preserved
139    /// when unpacking this entry.
140    ///
141    /// This flag is disabled by default and is currently only implemented on
142    /// Unix.
143    pub fn set_preserve_permissions(&mut self, preserve: bool) {
144        self.inner.preserve_permissions = preserve;
145    }
146
147    /// Indicate whether numeric ownership ids (like uid and gid on Unix)
148    /// are preserved when unpacking this entry.
149    ///
150    /// This flag is disabled by default and is currently only implemented on
151    /// Unix.
152    pub fn set_preserve_ownerships(&mut self, preserve: bool) {
153        self.inner.preserve_ownerships = preserve;
154    }
155
156    /// Indicate whether files and symlinks should be overwritten on extraction.
157    pub fn set_overwrite(&mut self, overwrite: bool) {
158        self.inner.overwrite = overwrite;
159    }
160
161    /// Indicate whether access time information is preserved when unpacking
162    /// this entry.
163    ///
164    /// This flag is enabled by default.
165    pub fn set_preserve_mtime(&mut self, preserve: bool) {
166        self.inner.preserve_mtime = preserve;
167    }
168
169    /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
170    /// entries.
171    ///
172    /// This can be used in case multiple tar archives have been concatenated together.
173    pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
174        self.inner.ignore_zeros = ignore_zeros;
175    }
176}
177
178impl<R: Seek + Read> Archive<R> {
179    /// Construct an iterator over the entries in this archive for a seekable
180    /// reader. Seek will be used to efficiently skip over file contents.
181    ///
182    /// Note that care must be taken to consider each entry within an archive in
183    /// sequence. If entries are processed out of sequence (from what the
184    /// iterator returns), then the contents read for each entry may be
185    /// corrupted.
186    pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
187        let me: &Archive<dyn Read> = self;
188        let me_seekable: &Archive<dyn SeekRead> = self;
189        me._entries(Some(me_seekable)).map(|fields| Entries {
190            fields: fields,
191            _ignored: marker::PhantomData,
192        })
193    }
194}
195
196impl Archive<dyn Read + '_> {
197    fn _entries<'a>(
198        &'a self,
199        seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
200    ) -> io::Result<EntriesFields<'a>> {
201        if self.inner.pos.get() != 0 {
202            return Err(other(
203                "cannot call entries unless archive is at \
204                 position 0",
205            ));
206        }
207        Ok(EntriesFields {
208            archive: self,
209            seekable_archive,
210            done: false,
211            next: 0,
212            raw: false,
213        })
214    }
215
216    fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
217        if dst.symlink_metadata().is_err() {
218            fs::create_dir_all(&dst)
219                .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
220        }
221
222        // Canonicalizing the dst directory will prepend the path with '\\?\'
223        // on windows which will allow windows APIs to treat the path as an
224        // extended-length path with a 32,767 character limit. Otherwise all
225        // unpacked paths over 260 characters will fail on creation with a
226        // NotFound exception.
227        let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
228
229        // Delay any directory entries until the end (they will be created if needed by
230        // descendants), to ensure that directory permissions do not interfer with descendant
231        // extraction.
232        let mut directories = Vec::new();
233        for entry in self._entries(None)? {
234            let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
235            if file.header().entry_type() == crate::EntryType::Directory {
236                directories.push(file);
237            } else {
238                file.unpack_in(dst)?;
239            }
240        }
241        for mut dir in directories {
242            dir.unpack_in(dst)?;
243        }
244
245        Ok(())
246    }
247}
248
249impl<'a, R: Read> Entries<'a, R> {
250    /// Indicates whether this iterator will return raw entries or not.
251    ///
252    /// If the raw list of entries are returned, then no preprocessing happens
253    /// on account of this library, for example taking into account GNU long name
254    /// or long link archive members. Raw iteration is disabled by default.
255    pub fn raw(self, raw: bool) -> Entries<'a, R> {
256        Entries {
257            fields: EntriesFields {
258                raw: raw,
259                ..self.fields
260            },
261            _ignored: marker::PhantomData,
262        }
263    }
264}
265impl<'a, R: Read> Iterator for Entries<'a, R> {
266    type Item = io::Result<Entry<'a, R>>;
267
268    fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
269        self.fields
270            .next()
271            .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
272    }
273}
274
275impl<'a> EntriesFields<'a> {
276    fn next_entry_raw(
277        &mut self,
278        pax_extensions: Option<&[u8]>,
279    ) -> io::Result<Option<Entry<'a, io::Empty>>> {
280        let mut header = Header::new_old();
281        let mut header_pos = self.next;
282        loop {
283            // Seek to the start of the next header in the archive
284            let delta = self.next - self.archive.inner.pos.get();
285            self.skip(delta)?;
286
287            // EOF is an indicator that we are at the end of the archive.
288            if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
289                return Ok(None);
290            }
291
292            // If a header is not all zeros, we have another valid header.
293            // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
294            // end of the archive.
295            if !header.as_bytes().iter().all(|i| *i == 0) {
296                self.next += 512;
297                break;
298            }
299
300            if !self.archive.inner.ignore_zeros {
301                return Ok(None);
302            }
303            self.next += 512;
304            header_pos = self.next;
305        }
306
307        // Make sure the checksum is ok
308        let sum = header.as_bytes()[..148]
309            .iter()
310            .chain(&header.as_bytes()[156..])
311            .fold(0, |a, b| a + (*b as u32))
312            + 8 * 32;
313        let cksum = header.cksum()?;
314        if sum != cksum {
315            return Err(other("archive header checksum mismatch"));
316        }
317
318        let mut pax_size: Option<u64> = None;
319        if let Some(pax_extensions_ref) = &pax_extensions {
320            pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
321
322            if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
323                header.set_uid(pax_uid);
324            }
325
326            if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
327                header.set_gid(pax_gid);
328            }
329        }
330
331        let file_pos = self.next;
332        let mut size = header.entry_size()?;
333        if size == 0 {
334            if let Some(pax_size) = pax_size {
335                size = pax_size;
336            }
337        }
338        let ret = EntryFields {
339            size: size,
340            header_pos: header_pos,
341            file_pos: file_pos,
342            data: vec![EntryIo::Data((&self.archive.inner).take(size))],
343            header: header,
344            long_pathname: None,
345            long_linkname: None,
346            pax_extensions: None,
347            mask: self.archive.inner.mask,
348            unpack_xattrs: self.archive.inner.unpack_xattrs,
349            preserve_permissions: self.archive.inner.preserve_permissions,
350            preserve_mtime: self.archive.inner.preserve_mtime,
351            overwrite: self.archive.inner.overwrite,
352            preserve_ownerships: self.archive.inner.preserve_ownerships,
353        };
354
355        // Store where the next entry is, rounding up by 512 bytes (the size of
356        // a header);
357        let size = size
358            .checked_add(511)
359            .ok_or_else(|| other("size overflow"))?;
360        self.next = self
361            .next
362            .checked_add(size & !(512 - 1))
363            .ok_or_else(|| other("size overflow"))?;
364
365        Ok(Some(ret.into_entry()))
366    }
367
368    fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
369        if self.raw {
370            return self.next_entry_raw(None);
371        }
372
373        let mut gnu_longname = None;
374        let mut gnu_longlink = None;
375        let mut pax_extensions = None;
376        let mut processed = 0;
377        loop {
378            processed += 1;
379            let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
380                Some(entry) => entry,
381                None if processed > 1 => {
382                    return Err(other(
383                        "members found describing a future member \
384                         but no future member found",
385                    ));
386                }
387                None => return Ok(None),
388            };
389
390            let is_recognized_header =
391                entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
392
393            if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
394                if gnu_longname.is_some() {
395                    return Err(other(
396                        "two long name entries describing \
397                         the same member",
398                    ));
399                }
400                gnu_longname = Some(EntryFields::from(entry).read_all()?);
401                continue;
402            }
403
404            if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
405                if gnu_longlink.is_some() {
406                    return Err(other(
407                        "two long name entries describing \
408                         the same member",
409                    ));
410                }
411                gnu_longlink = Some(EntryFields::from(entry).read_all()?);
412                continue;
413            }
414
415            if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
416                if pax_extensions.is_some() {
417                    return Err(other(
418                        "two pax extensions entries describing \
419                         the same member",
420                    ));
421                }
422                pax_extensions = Some(EntryFields::from(entry).read_all()?);
423                continue;
424            }
425
426            let mut fields = EntryFields::from(entry);
427            fields.long_pathname = gnu_longname;
428            fields.long_linkname = gnu_longlink;
429            fields.pax_extensions = pax_extensions;
430            self.parse_sparse_header(&mut fields)?;
431            return Ok(Some(fields.into_entry()));
432        }
433    }
434
435    fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
436        if !entry.header.entry_type().is_gnu_sparse() {
437            return Ok(());
438        }
439        let gnu = match entry.header.as_gnu() {
440            Some(gnu) => gnu,
441            None => return Err(other("sparse entry type listed but not GNU header")),
442        };
443
444        // Sparse files are represented internally as a list of blocks that are
445        // read. Blocks are either a bunch of 0's or they're data from the
446        // underlying archive.
447        //
448        // Blocks of a sparse file are described by the `GnuSparseHeader`
449        // structure, some of which are contained in `GnuHeader` but some of
450        // which may also be contained after the first header in further
451        // headers.
452        //
453        // We read off all the blocks here and use the `add_block` function to
454        // incrementally add them to the list of I/O block (in `entry.data`).
455        // The `add_block` function also validates that each chunk comes after
456        // the previous, we don't overrun the end of the file, and each block is
457        // aligned to a 512-byte boundary in the archive itself.
458        //
459        // At the end we verify that the sparse file size (`Header::size`) is
460        // the same as the current offset (described by the list of blocks) as
461        // well as the amount of data read equals the size of the entry
462        // (`Header::entry_size`).
463        entry.data.truncate(0);
464
465        let mut cur = 0;
466        let mut remaining = entry.size;
467        {
468            let data = &mut entry.data;
469            let reader = &self.archive.inner;
470            let size = entry.size;
471            let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
472                if block.is_empty() {
473                    return Ok(());
474                }
475                let off = block.offset()?;
476                let len = block.length()?;
477                if len != 0 && (size - remaining) % 512 != 0 {
478                    return Err(other(
479                        "previous block in sparse file was not \
480                         aligned to 512-byte boundary",
481                    ));
482                } else if off < cur {
483                    return Err(other(
484                        "out of order or overlapping sparse \
485                         blocks",
486                    ));
487                } else if cur < off {
488                    let block = io::repeat(0).take(off - cur);
489                    data.push(EntryIo::Pad(block));
490                }
491                cur = off
492                    .checked_add(len)
493                    .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
494                remaining = remaining.checked_sub(len).ok_or_else(|| {
495                    other(
496                        "sparse file consumed more data than the header \
497                         listed",
498                    )
499                })?;
500                data.push(EntryIo::Data(reader.take(len)));
501                Ok(())
502            };
503            for block in gnu.sparse.iter() {
504                add_block(block)?
505            }
506            if gnu.is_extended() {
507                let mut ext = GnuExtSparseHeader::new();
508                ext.isextended[0] = 1;
509                while ext.is_extended() {
510                    if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
511                        return Err(other("failed to read extension"));
512                    }
513
514                    self.next += 512;
515                    for block in ext.sparse.iter() {
516                        add_block(block)?;
517                    }
518                }
519            }
520        }
521        if cur != gnu.real_size()? {
522            return Err(other(
523                "mismatch in sparse file chunks and \
524                 size in header",
525            ));
526        }
527        entry.size = cur;
528        if remaining > 0 {
529            return Err(other(
530                "mismatch in sparse file chunks and \
531                 entry size in header",
532            ));
533        }
534        Ok(())
535    }
536
537    fn skip(&mut self, mut amt: u64) -> io::Result<()> {
538        if let Some(seekable_archive) = self.seekable_archive {
539            let pos = io::SeekFrom::Current(
540                i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
541            );
542            (&seekable_archive.inner).seek(pos)?;
543        } else {
544            let mut buf = [0u8; 4096 * 8];
545            while amt > 0 {
546                let n = cmp::min(amt, buf.len() as u64);
547                let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
548                if n == 0 {
549                    return Err(other("unexpected EOF during skip"));
550                }
551                amt -= n as u64;
552            }
553        }
554        Ok(())
555    }
556}
557
558impl<'a> Iterator for EntriesFields<'a> {
559    type Item = io::Result<Entry<'a, io::Empty>>;
560
561    fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
562        if self.done {
563            None
564        } else {
565            match self.next_entry() {
566                Ok(Some(e)) => Some(Ok(e)),
567                Ok(None) => {
568                    self.done = true;
569                    None
570                }
571                Err(e) => {
572                    self.done = true;
573                    Some(Err(e))
574                }
575            }
576        }
577    }
578}
579
580impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
581    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
582        let i = self.obj.borrow_mut().read(into)?;
583        self.pos.set(self.pos.get() + i as u64);
584        Ok(i)
585    }
586}
587
588impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
589    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
590        let pos = self.obj.borrow_mut().seek(pos)?;
591        self.pos.set(pos);
592        Ok(pos)
593    }
594}
595
596/// Try to fill the buffer from the reader.
597///
598/// If the reader reaches its end before filling the buffer at all, returns `false`.
599/// Otherwise returns `true`.
600fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
601    let mut read = 0;
602    while read < buf.len() {
603        match r.read(&mut buf[read..])? {
604            0 => {
605                if read == 0 {
606                    return Ok(false);
607                }
608
609                return Err(other("failed to read entire block"));
610            }
611            n => read += n,
612        }
613    }
614    Ok(true)
615}