binstall_tar/entry.rs
1use std::borrow::Cow;
2use std::cmp;
3use std::fs;
4use std::fs::OpenOptions;
5use std::io::prelude::*;
6use std::io::{self, Error, ErrorKind, SeekFrom};
7use std::marker;
8use std::path::{Component, Path, PathBuf};
9
10use filetime::{self, FileTime};
11
12use crate::archive::ArchiveInner;
13use crate::error::TarError;
14use crate::header::bytes2path;
15use crate::other;
16use crate::{Archive, Header, PaxExtensions};
17
18/// A read-only view into an entry of an archive.
19///
20/// This structure is a window into a portion of a borrowed archive which can
21/// be inspected. It acts as a file handle by implementing the Reader trait. An
22/// entry cannot be rewritten once inserted into an archive.
23pub struct Entry<'a, R: 'a + Read> {
24 fields: EntryFields<'a>,
25 _ignored: marker::PhantomData<&'a Archive<R>>,
26}
27
28// private implementation detail of `Entry`, but concrete (no type parameters)
29// and also all-public to be constructed from other modules.
30pub struct EntryFields<'a> {
31 pub long_pathname: Option<Vec<u8>>,
32 pub long_linkname: Option<Vec<u8>>,
33 pub pax_extensions: Option<Vec<u8>>,
34 pub mask: u32,
35 pub header: Header,
36 pub size: u64,
37 pub header_pos: u64,
38 pub file_pos: u64,
39 pub data: Vec<EntryIo<'a>>,
40 pub unpack_xattrs: bool,
41 pub preserve_permissions: bool,
42 pub preserve_ownerships: bool,
43 pub preserve_mtime: bool,
44 pub overwrite: bool,
45}
46
47pub enum EntryIo<'a> {
48 Pad(io::Take<io::Repeat>),
49 Data(io::Take<&'a ArchiveInner<dyn Read + 'a>>),
50}
51
52/// When unpacking items the unpacked thing is returned to allow custom
53/// additional handling by users. Today the File is returned, in future
54/// the enum may be extended with kinds for links, directories etc.
55#[derive(Debug)]
56pub enum Unpacked {
57 /// A file was unpacked.
58 File(std::fs::File),
59 /// A directory, hardlink, symlink, or other node was unpacked.
60 #[doc(hidden)]
61 __Nonexhaustive,
62}
63
64impl<'a, R: Read> Entry<'a, R> {
65 /// Returns the path name for this entry.
66 ///
67 /// This method may fail if the pathname is not valid Unicode and this is
68 /// called on a Windows platform.
69 ///
70 /// Note that this function will convert any `\` characters to directory
71 /// separators, and it will not always return the same value as
72 /// `self.header().path()` as some archive formats have support for longer
73 /// path names described in separate entries.
74 ///
75 /// It is recommended to use this method instead of inspecting the `header`
76 /// directly to ensure that various archive formats are handled correctly.
77 pub fn path(&self) -> io::Result<Cow<Path>> {
78 self.fields.path()
79 }
80
81 /// Returns the raw bytes listed for this entry.
82 ///
83 /// Note that this function will convert any `\` characters to directory
84 /// separators, and it will not always return the same value as
85 /// `self.header().path_bytes()` as some archive formats have support for
86 /// longer path names described in separate entries.
87 pub fn path_bytes(&self) -> Cow<[u8]> {
88 self.fields.path_bytes()
89 }
90
91 /// Returns the link name for this entry, if any is found.
92 ///
93 /// This method may fail if the pathname is not valid Unicode and this is
94 /// called on a Windows platform. `Ok(None)` being returned, however,
95 /// indicates that the link name was not present.
96 ///
97 /// Note that this function will convert any `\` characters to directory
98 /// separators, and it will not always return the same value as
99 /// `self.header().link_name()` as some archive formats have support for
100 /// longer path names described in separate entries.
101 ///
102 /// It is recommended to use this method instead of inspecting the `header`
103 /// directly to ensure that various archive formats are handled correctly.
104 pub fn link_name(&self) -> io::Result<Option<Cow<Path>>> {
105 self.fields.link_name()
106 }
107
108 /// Returns the link name for this entry, in bytes, if listed.
109 ///
110 /// Note that this will not always return the same value as
111 /// `self.header().link_name_bytes()` as some archive formats have support for
112 /// longer path names described in separate entries.
113 pub fn link_name_bytes(&self) -> Option<Cow<[u8]>> {
114 self.fields.link_name_bytes()
115 }
116
117 /// Returns an iterator over the pax extensions contained in this entry.
118 ///
119 /// Pax extensions are a form of archive where extra metadata is stored in
120 /// key/value pairs in entries before the entry they're intended to
121 /// describe. For example this can be used to describe long file name or
122 /// other metadata like atime/ctime/mtime in more precision.
123 ///
124 /// The returned iterator will yield key/value pairs for each extension.
125 ///
126 /// `None` will be returned if this entry does not indicate that it itself
127 /// contains extensions, or if there were no previous extensions describing
128 /// it.
129 ///
130 /// Note that global pax extensions are intended to be applied to all
131 /// archive entries.
132 ///
133 /// Also note that this function will read the entire entry if the entry
134 /// itself is a list of extensions.
135 pub fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> {
136 self.fields.pax_extensions()
137 }
138
139 /// Returns access to the header of this entry in the archive.
140 ///
141 /// This provides access to the metadata for this entry in the archive.
142 pub fn header(&self) -> &Header {
143 &self.fields.header
144 }
145
146 /// Returns access to the size of this entry in the archive.
147 ///
148 /// In the event the size is stored in a pax extension, that size value
149 /// will be referenced. Otherwise, the entry size will be stored in the header.
150 pub fn size(&self) -> u64 {
151 self.fields.size
152 }
153
154 /// Returns the starting position, in bytes, of the header of this entry in
155 /// the archive.
156 ///
157 /// The header is always a contiguous section of 512 bytes, so if the
158 /// underlying reader implements `Seek`, then the slice from `header_pos` to
159 /// `header_pos + 512` contains the raw header bytes.
160 pub fn raw_header_position(&self) -> u64 {
161 self.fields.header_pos
162 }
163
164 /// Returns the starting position, in bytes, of the file of this entry in
165 /// the archive.
166 ///
167 /// If the file of this entry is continuous (e.g. not a sparse file), and
168 /// if the underlying reader implements `Seek`, then the slice from
169 /// `file_pos` to `file_pos + entry_size` contains the raw file bytes.
170 pub fn raw_file_position(&self) -> u64 {
171 self.fields.file_pos
172 }
173
174 /// Writes this file to the specified location.
175 ///
176 /// This function will write the entire contents of this file into the
177 /// location specified by `dst`. Metadata will also be propagated to the
178 /// path `dst`.
179 ///
180 /// This function will create a file at the path `dst`, and it is required
181 /// that the intermediate directories are created. Any existing file at the
182 /// location `dst` will be overwritten.
183 ///
184 /// > **Note**: This function does not have as many sanity checks as
185 /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're
186 /// > thinking of unpacking untrusted tarballs you may want to review the
187 /// > implementations of the previous two functions and perhaps implement
188 /// > similar logic yourself.
189 ///
190 /// # Examples
191 ///
192 /// ```no_run
193 /// use std::fs::File;
194 /// use binstall_tar::Archive;
195 ///
196 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
197 ///
198 /// for (i, file) in ar.entries().unwrap().enumerate() {
199 /// let mut file = file.unwrap();
200 /// file.unpack(format!("file-{}", i)).unwrap();
201 /// }
202 /// ```
203 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> {
204 self.fields.unpack(None, dst.as_ref())
205 }
206
207 /// Extracts this file under the specified path, avoiding security issues.
208 ///
209 /// This function will write the entire contents of this file into the
210 /// location obtained by appending the path of this file in the archive to
211 /// `dst`, creating any intermediate directories if needed. Metadata will
212 /// also be propagated to the path `dst`. Any existing file at the location
213 /// `dst` will be overwritten.
214 ///
215 /// This function carefully avoids writing outside of `dst`. If the file has
216 /// a '..' in its path, this function will skip it and return false.
217 ///
218 /// # Examples
219 ///
220 /// ```no_run
221 /// use std::fs::File;
222 /// use binstall_tar::Archive;
223 ///
224 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
225 ///
226 /// for (i, file) in ar.entries().unwrap().enumerate() {
227 /// let mut file = file.unwrap();
228 /// file.unpack_in("target").unwrap();
229 /// }
230 /// ```
231 pub fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<bool> {
232 self.fields.unpack_in(dst.as_ref())
233 }
234
235 /// Set the mask of the permission bits when unpacking this entry.
236 ///
237 /// The mask will be inverted when applying against a mode, similar to how
238 /// `umask` works on Unix. In logical notation it looks like:
239 ///
240 /// ```text
241 /// new_mode = old_mode & (~mask)
242 /// ```
243 ///
244 /// The mask is 0 by default and is currently only implemented on Unix.
245 pub fn set_mask(&mut self, mask: u32) {
246 self.fields.mask = mask;
247 }
248
249 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
250 /// when unpacking this entry.
251 ///
252 /// This flag is disabled by default and is currently only implemented on
253 /// Unix using xattr support. This may eventually be implemented for
254 /// Windows, however, if other archive implementations are found which do
255 /// this as well.
256 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
257 self.fields.unpack_xattrs = unpack_xattrs;
258 }
259
260 /// Indicate whether extended permissions (like suid on Unix) are preserved
261 /// when unpacking this entry.
262 ///
263 /// This flag is disabled by default and is currently only implemented on
264 /// Unix.
265 pub fn set_preserve_permissions(&mut self, preserve: bool) {
266 self.fields.preserve_permissions = preserve;
267 }
268
269 /// Indicate whether access time information is preserved when unpacking
270 /// this entry.
271 ///
272 /// This flag is enabled by default.
273 pub fn set_preserve_mtime(&mut self, preserve: bool) {
274 self.fields.preserve_mtime = preserve;
275 }
276}
277
278impl<'a, R: Read> Read for Entry<'a, R> {
279 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
280 self.fields.read(into)
281 }
282}
283
284impl<'a> EntryFields<'a> {
285 pub fn from<R: Read>(entry: Entry<R>) -> EntryFields {
286 entry.fields
287 }
288
289 pub fn into_entry<R: Read>(self) -> Entry<'a, R> {
290 Entry {
291 fields: self,
292 _ignored: marker::PhantomData,
293 }
294 }
295
296 pub fn read_all(&mut self) -> io::Result<Vec<u8>> {
297 // Preallocate some data but don't let ourselves get too crazy now.
298 let cap = cmp::min(self.size, 128 * 1024);
299 let mut v = Vec::with_capacity(cap as usize);
300 self.read_to_end(&mut v).map(|_| v)
301 }
302
303 pub fn is_pax_sparse(&mut self) -> bool {
304 if let Some(ref pax) = self.pax_extensions {
305 let mut extensions = PaxExtensions::new(pax).filter_map(|f| f.ok());
306 return extensions
307 .find(|f| f.key_bytes() == b"GNU.sparse.major" && f.value_bytes() == b"1")
308 .is_some()
309 && extensions
310 .find(|f| f.key_bytes() == b"GNU.sparse.minor" && f.value_bytes() == b"0")
311 .is_some();
312 }
313 false
314 }
315
316 pub fn pax_sparse_name(&mut self) -> Option<Vec<u8>> {
317 if let Some(ref pax) = self.pax_extensions {
318 return PaxExtensions::new(pax)
319 .filter_map(|f| f.ok())
320 .find(|f| f.key_bytes() == b"GNU.sparse.name")
321 .map(|f| f.value_bytes().to_vec());
322 }
323 None
324 }
325
326 pub fn pax_sparse_realsize(&mut self) -> io::Result<u64> {
327 if let Some(ref pax) = self.pax_extensions {
328 let pax = PaxExtensions::new(pax)
329 .filter_map(|f| f.ok())
330 .find(|f| f.key_bytes() == b"GNU.sparse.realsize")
331 .map(|f| f.value_bytes());
332 if let Some(field) = pax {
333 let str =
334 std::str::from_utf8(&field).map_err(|_| other("failed to read string"))?;
335 return str
336 .parse::<u64>()
337 .map_err(|_| other("failed to parse the real size"));
338 }
339 }
340 Err(other("PAX extension GNU.sparse.realsize not found"))
341 }
342
343 fn path(&self) -> io::Result<Cow<Path>> {
344 bytes2path(self.path_bytes())
345 }
346
347 fn path_bytes(&self) -> Cow<[u8]> {
348 match self.long_pathname {
349 Some(ref bytes) => {
350 if let Some(&0) = bytes.last() {
351 Cow::Borrowed(&bytes[..bytes.len() - 1])
352 } else {
353 Cow::Borrowed(bytes)
354 }
355 }
356 None => {
357 if let Some(ref pax) = self.pax_extensions {
358 let pax = PaxExtensions::new(pax)
359 .filter_map(|f| f.ok())
360 .find(|f| f.key_bytes() == b"path")
361 .map(|f| f.value_bytes());
362 if let Some(field) = pax {
363 return Cow::Borrowed(field);
364 }
365 }
366 self.header.path_bytes()
367 }
368 }
369 }
370
371 /// Gets the path in a "lossy" way, used for error reporting ONLY.
372 fn path_lossy(&self) -> String {
373 String::from_utf8_lossy(&self.path_bytes()).to_string()
374 }
375
376 fn link_name(&self) -> io::Result<Option<Cow<Path>>> {
377 match self.link_name_bytes() {
378 Some(bytes) => bytes2path(bytes).map(Some),
379 None => Ok(None),
380 }
381 }
382
383 fn link_name_bytes(&self) -> Option<Cow<[u8]>> {
384 match self.long_linkname {
385 Some(ref bytes) => {
386 if let Some(&0) = bytes.last() {
387 Some(Cow::Borrowed(&bytes[..bytes.len() - 1]))
388 } else {
389 Some(Cow::Borrowed(bytes))
390 }
391 }
392 None => {
393 if let Some(ref pax) = self.pax_extensions {
394 let pax = PaxExtensions::new(pax)
395 .filter_map(|f| f.ok())
396 .find(|f| f.key_bytes() == b"linkpath")
397 .map(|f| f.value_bytes());
398 if let Some(field) = pax {
399 return Some(Cow::Borrowed(field));
400 }
401 }
402 self.header.link_name_bytes()
403 }
404 }
405 }
406
407 fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> {
408 if self.pax_extensions.is_none() {
409 if !self.header.entry_type().is_pax_global_extensions()
410 && !self.header.entry_type().is_pax_local_extensions()
411 {
412 return Ok(None);
413 }
414 self.pax_extensions = Some(self.read_all()?);
415 }
416 Ok(Some(PaxExtensions::new(
417 self.pax_extensions.as_ref().unwrap(),
418 )))
419 }
420
421 fn unpack_in(&mut self, dst: &Path) -> io::Result<bool> {
422 // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
423 // * Leading '/'s are trimmed. For example, `///test` is treated as
424 // `test`.
425 // * If the filename contains '..', then the file is skipped when
426 // extracting the tarball.
427 // * '//' within a filename is effectively skipped. An error is
428 // logged, but otherwise the effect is as if any two or more
429 // adjacent '/'s within the filename were consolidated into one
430 // '/'.
431 //
432 // Most of this is handled by the `path` module of the standard
433 // library, but we specially handle a few cases here as well.
434
435 let mut file_dst = dst.to_path_buf();
436 {
437 let path = self.path().map_err(|e| {
438 TarError::new(
439 format!("invalid path in entry header: {}", self.path_lossy()),
440 e,
441 )
442 })?;
443 for part in path.components() {
444 match part {
445 // Leading '/' characters, root paths, and '.'
446 // components are just ignored and treated as "empty
447 // components"
448 Component::Prefix(..) | Component::RootDir | Component::CurDir => continue,
449
450 // If any part of the filename is '..', then skip over
451 // unpacking the file to prevent directory traversal
452 // security issues. See, e.g.: CVE-2001-1267,
453 // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
454 Component::ParentDir => return Ok(false),
455
456 Component::Normal(part) => file_dst.push(part),
457 }
458 }
459 }
460
461 // Skip cases where only slashes or '.' parts were seen, because
462 // this is effectively an empty filename.
463 if *dst == *file_dst {
464 return Ok(true);
465 }
466
467 // Skip entries without a parent (i.e. outside of FS root)
468 let parent = match file_dst.parent() {
469 Some(p) => p,
470 None => return Ok(false),
471 };
472
473 self.ensure_dir_created(&dst, parent)
474 .map_err(|e| TarError::new(format!("failed to create `{}`", parent.display()), e))?;
475
476 let canon_target = self.validate_inside_dst(&dst, parent)?;
477
478 self.unpack(Some(&canon_target), &file_dst)
479 .map_err(|e| TarError::new(format!("failed to unpack `{}`", file_dst.display()), e))?;
480
481 Ok(true)
482 }
483
484 /// Unpack as destination directory `dst`.
485 fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> {
486 // If the directory already exists just let it slide
487 fs::create_dir(dst).or_else(|err| {
488 if err.kind() == ErrorKind::AlreadyExists {
489 let prev = fs::metadata(dst);
490 if prev.map(|m| m.is_dir()).unwrap_or(false) {
491 return Ok(());
492 }
493 }
494 Err(Error::new(
495 err.kind(),
496 format!("{} when creating dir {}", err, dst.display()),
497 ))
498 })
499 }
500
501 /// Returns access to the header of this entry in the archive.
502 fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> {
503 fn set_perms_ownerships(
504 dst: &Path,
505 f: Option<&mut std::fs::File>,
506 header: &Header,
507 mask: u32,
508 perms: bool,
509 ownerships: bool,
510 ) -> io::Result<()> {
511 // ownerships need to be set first to avoid stripping SUID bits in the permissions ...
512 if ownerships {
513 set_ownerships(dst, &f, header.uid()?, header.gid()?)?;
514 }
515 // ... then set permissions, SUID bits set here is kept
516 if let Ok(mode) = header.mode() {
517 set_perms(dst, f, mode, mask, perms)?;
518 }
519
520 Ok(())
521 }
522
523 fn get_mtime(header: &Header) -> Option<FileTime> {
524 header.mtime().ok().map(|mtime| {
525 // For some more information on this see the comments in
526 // `Header::fill_platform_from`, but the general idea is that
527 // we're trying to avoid 0-mtime files coming out of archives
528 // since some tools don't ingest them well. Perhaps one day
529 // when Cargo stops working with 0-mtime archives we can remove
530 // this.
531 let mtime = if mtime == 0 { 1 } else { mtime };
532 FileTime::from_unix_time(mtime as i64, 0)
533 })
534 }
535
536 let kind = self.header.entry_type();
537
538 if kind.is_dir() {
539 self.unpack_dir(dst)?;
540 set_perms_ownerships(
541 dst,
542 None,
543 &self.header,
544 self.mask,
545 self.preserve_permissions,
546 self.preserve_ownerships,
547 )?;
548 return Ok(Unpacked::__Nonexhaustive);
549 } else if kind.is_hard_link() || kind.is_symlink() {
550 let src = match self.link_name()? {
551 Some(name) => name,
552 None => {
553 return Err(other(&format!(
554 "hard link listed for {} but no link name found",
555 String::from_utf8_lossy(self.header.as_bytes())
556 )));
557 }
558 };
559
560 if src.iter().count() == 0 {
561 return Err(other(&format!(
562 "symlink destination for {} is empty",
563 String::from_utf8_lossy(self.header.as_bytes())
564 )));
565 }
566
567 if kind.is_hard_link() {
568 let link_src = match target_base {
569 // If we're unpacking within a directory then ensure that
570 // the destination of this hard link is both present and
571 // inside our own directory. This is needed because we want
572 // to make sure to not overwrite anything outside the root.
573 //
574 // Note that this logic is only needed for hard links
575 // currently. With symlinks the `validate_inside_dst` which
576 // happens before this method as part of `unpack_in` will
577 // use canonicalization to ensure this guarantee. For hard
578 // links though they're canonicalized to their existing path
579 // so we need to validate at this time.
580 Some(ref p) => {
581 let link_src = p.join(src);
582 self.validate_inside_dst(p, &link_src)?;
583 link_src
584 }
585 None => src.into_owned(),
586 };
587 fs::hard_link(&link_src, dst).map_err(|err| {
588 Error::new(
589 err.kind(),
590 format!(
591 "{} when hard linking {} to {}",
592 err,
593 link_src.display(),
594 dst.display()
595 ),
596 )
597 })?;
598 } else {
599 symlink(&src, dst)
600 .or_else(|err_io| {
601 if err_io.kind() == io::ErrorKind::AlreadyExists && self.overwrite {
602 // remove dest and try once more
603 std::fs::remove_file(dst).and_then(|()| symlink(&src, dst))
604 } else {
605 Err(err_io)
606 }
607 })
608 .map_err(|err| {
609 Error::new(
610 err.kind(),
611 format!(
612 "{} when symlinking {} to {}",
613 err,
614 src.display(),
615 dst.display()
616 ),
617 )
618 })?;
619 if self.preserve_mtime {
620 if let Some(mtime) = get_mtime(&self.header) {
621 filetime::set_symlink_file_times(dst, mtime, mtime).map_err(|e| {
622 TarError::new(format!("failed to set mtime for `{}`", dst.display()), e)
623 })?;
624 }
625 }
626 }
627 return Ok(Unpacked::__Nonexhaustive);
628
629 #[cfg(target_arch = "wasm32")]
630 #[allow(unused_variables)]
631 fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
632 Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
633 }
634
635 #[cfg(windows)]
636 fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
637 ::std::os::windows::fs::symlink_file(src, dst)
638 }
639
640 #[cfg(unix)]
641 fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
642 ::std::os::unix::fs::symlink(src, dst)
643 }
644 } else if kind.is_pax_global_extensions()
645 || kind.is_pax_local_extensions()
646 || kind.is_gnu_longname()
647 || kind.is_gnu_longlink()
648 {
649 return Ok(Unpacked::__Nonexhaustive);
650 };
651
652 // Old BSD-tar compatibility.
653 // Names that have a trailing slash should be treated as a directory.
654 // Only applies to old headers.
655 if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") {
656 self.unpack_dir(dst)?;
657 set_perms_ownerships(
658 dst,
659 None,
660 &self.header,
661 self.mask,
662 self.preserve_permissions,
663 self.preserve_ownerships,
664 )?;
665 return Ok(Unpacked::__Nonexhaustive);
666 }
667
668 // Note the lack of `else` clause above. According to the FreeBSD
669 // documentation:
670 //
671 // > A POSIX-compliant implementation must treat any unrecognized
672 // > typeflag value as a regular file.
673 //
674 // As a result if we don't recognize the kind we just write out the file
675 // as we would normally.
676
677 // Ensure we write a new file rather than overwriting in-place which
678 // is attackable; if an existing file is found unlink it.
679 fn open(dst: &Path) -> io::Result<std::fs::File> {
680 OpenOptions::new().write(true).create_new(true).open(dst)
681 }
682 let mut f = (|| -> io::Result<std::fs::File> {
683 let mut f = open(dst).or_else(|err| {
684 if err.kind() != ErrorKind::AlreadyExists {
685 Err(err)
686 } else if self.overwrite {
687 match fs::remove_file(dst) {
688 Ok(()) => open(dst),
689 Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst),
690 Err(e) => Err(e),
691 }
692 } else {
693 Err(err)
694 }
695 })?;
696 for io in self.data.drain(..) {
697 match io {
698 EntryIo::Data(mut d) => {
699 let expected = d.limit();
700 if io::copy(&mut d, &mut f)? != expected {
701 return Err(other("failed to write entire file"));
702 }
703 }
704 EntryIo::Pad(d) => {
705 // TODO: checked cast to i64
706 let to = SeekFrom::Current(d.limit() as i64);
707 let size = f.seek(to)?;
708 f.set_len(size)?;
709 }
710 }
711 }
712 Ok(f)
713 })()
714 .map_err(|e| {
715 let header = self.header.path_bytes();
716 TarError::new(
717 format!(
718 "failed to unpack `{}` into `{}`",
719 String::from_utf8_lossy(&header),
720 dst.display()
721 ),
722 e,
723 )
724 })?;
725
726 if self.preserve_mtime {
727 if let Some(mtime) = get_mtime(&self.header) {
728 filetime::set_file_handle_times(&f, Some(mtime), Some(mtime)).map_err(|e| {
729 TarError::new(format!("failed to set mtime for `{}`", dst.display()), e)
730 })?;
731 }
732 }
733 set_perms_ownerships(
734 dst,
735 Some(&mut f),
736 &self.header,
737 self.mask,
738 self.preserve_permissions,
739 self.preserve_ownerships,
740 )?;
741 if self.unpack_xattrs {
742 set_xattrs(self, dst)?;
743 }
744 return Ok(Unpacked::File(f));
745
746 fn set_ownerships(
747 dst: &Path,
748 f: &Option<&mut std::fs::File>,
749 uid: u64,
750 gid: u64,
751 ) -> Result<(), TarError> {
752 _set_ownerships(dst, f, uid, gid).map_err(|e| {
753 TarError::new(
754 format!(
755 "failed to set ownerships to uid={:?}, gid={:?} \
756 for `{}`",
757 uid,
758 gid,
759 dst.display()
760 ),
761 e,
762 )
763 })
764 }
765
766 #[cfg(unix)]
767 fn _set_ownerships(
768 dst: &Path,
769 f: &Option<&mut std::fs::File>,
770 uid: u64,
771 gid: u64,
772 ) -> io::Result<()> {
773 use std::os::unix::prelude::*;
774
775 let uid: libc::uid_t = uid.try_into().map_err(|_| {
776 io::Error::new(io::ErrorKind::Other, format!("UID {} is too large!", uid))
777 })?;
778 let gid: libc::gid_t = gid.try_into().map_err(|_| {
779 io::Error::new(io::ErrorKind::Other, format!("GID {} is too large!", gid))
780 })?;
781 match f {
782 Some(f) => unsafe {
783 let fd = f.as_raw_fd();
784 if libc::fchown(fd, uid, gid) != 0 {
785 Err(io::Error::last_os_error())
786 } else {
787 Ok(())
788 }
789 },
790 None => unsafe {
791 let path = std::ffi::CString::new(dst.as_os_str().as_bytes()).map_err(|e| {
792 io::Error::new(
793 io::ErrorKind::Other,
794 format!("path contains null character: {:?}", e),
795 )
796 })?;
797 if libc::lchown(path.as_ptr(), uid, gid) != 0 {
798 Err(io::Error::last_os_error())
799 } else {
800 Ok(())
801 }
802 },
803 }
804 }
805
806 // Windows does not support posix numeric ownership IDs
807 #[cfg(any(windows, target_arch = "wasm32"))]
808 fn _set_ownerships(
809 _: &Path,
810 _: &Option<&mut std::fs::File>,
811 _: u64,
812 _: u64,
813 ) -> io::Result<()> {
814 Ok(())
815 }
816
817 fn set_perms(
818 dst: &Path,
819 f: Option<&mut std::fs::File>,
820 mode: u32,
821 mask: u32,
822 preserve: bool,
823 ) -> Result<(), TarError> {
824 _set_perms(dst, f, mode, mask, preserve).map_err(|e| {
825 TarError::new(
826 format!(
827 "failed to set permissions to {:o} \
828 for `{}`",
829 mode,
830 dst.display()
831 ),
832 e,
833 )
834 })
835 }
836
837 #[cfg(unix)]
838 fn _set_perms(
839 dst: &Path,
840 f: Option<&mut std::fs::File>,
841 mode: u32,
842 mask: u32,
843 preserve: bool,
844 ) -> io::Result<()> {
845 use std::os::unix::prelude::*;
846
847 let mode = if preserve { mode } else { mode & 0o777 };
848 let mode = mode & !mask;
849 let perm = fs::Permissions::from_mode(mode as _);
850 match f {
851 Some(f) => f.set_permissions(perm),
852 None => fs::set_permissions(dst, perm),
853 }
854 }
855
856 #[cfg(windows)]
857 fn _set_perms(
858 dst: &Path,
859 f: Option<&mut std::fs::File>,
860 mode: u32,
861 _mask: u32,
862 _preserve: bool,
863 ) -> io::Result<()> {
864 if mode & 0o200 == 0o200 {
865 return Ok(());
866 }
867 match f {
868 Some(f) => {
869 let mut perm = f.metadata()?.permissions();
870 perm.set_readonly(true);
871 f.set_permissions(perm)
872 }
873 None => {
874 let mut perm = fs::metadata(dst)?.permissions();
875 perm.set_readonly(true);
876 fs::set_permissions(dst, perm)
877 }
878 }
879 }
880
881 #[cfg(target_arch = "wasm32")]
882 #[allow(unused_variables)]
883 fn _set_perms(
884 dst: &Path,
885 f: Option<&mut std::fs::File>,
886 mode: u32,
887 mask: u32,
888 _preserve: bool,
889 ) -> io::Result<()> {
890 Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
891 }
892
893 #[cfg(all(unix, feature = "xattr"))]
894 fn set_xattrs(me: &mut EntryFields, dst: &Path) -> io::Result<()> {
895 use std::ffi::OsStr;
896 use std::os::unix::prelude::*;
897
898 let exts = match me.pax_extensions() {
899 Ok(Some(e)) => e,
900 _ => return Ok(()),
901 };
902 let exts = exts
903 .filter_map(|e| e.ok())
904 .filter_map(|e| {
905 let key = e.key_bytes();
906 let prefix = b"SCHILY.xattr.";
907 key.strip_prefix(prefix).map(|rest| (rest, e))
908 })
909 .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes()));
910
911 for (key, value) in exts {
912 xattr::set(dst, key, value).map_err(|e| {
913 TarError::new(
914 format!(
915 "failed to set extended \
916 attributes to {}. \
917 Xattrs: key={:?}, value={:?}.",
918 dst.display(),
919 key,
920 String::from_utf8_lossy(value)
921 ),
922 e,
923 )
924 })?;
925 }
926
927 Ok(())
928 }
929 // Windows does not completely support posix xattrs
930 // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT
931 #[cfg(any(windows, not(feature = "xattr"), target_arch = "wasm32"))]
932 fn set_xattrs(_: &mut EntryFields, _: &Path) -> io::Result<()> {
933 Ok(())
934 }
935 }
936
937 fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> {
938 let mut ancestor = dir;
939 let mut dirs_to_create = Vec::new();
940 while ancestor.symlink_metadata().is_err() {
941 dirs_to_create.push(ancestor);
942 if let Some(parent) = ancestor.parent() {
943 ancestor = parent;
944 } else {
945 break;
946 }
947 }
948 for ancestor in dirs_to_create.into_iter().rev() {
949 if let Some(parent) = ancestor.parent() {
950 self.validate_inside_dst(dst, parent)?;
951 }
952 fs::create_dir_all(ancestor)?;
953 }
954 Ok(())
955 }
956
957 fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<PathBuf> {
958 // Abort if target (canonical) parent is outside of `dst`
959 let canon_parent = file_dst.canonicalize().map_err(|err| {
960 Error::new(
961 err.kind(),
962 format!("{} while canonicalizing {}", err, file_dst.display()),
963 )
964 })?;
965 let canon_target = dst.canonicalize().map_err(|err| {
966 Error::new(
967 err.kind(),
968 format!("{} while canonicalizing {}", err, dst.display()),
969 )
970 })?;
971 if !canon_parent.starts_with(&canon_target) {
972 let err = TarError::new(
973 format!(
974 "trying to unpack outside of destination path: {}",
975 canon_target.display()
976 ),
977 // TODO: use ErrorKind::InvalidInput here? (minor breaking change)
978 Error::new(ErrorKind::Other, "Invalid argument"),
979 );
980 return Err(err.into());
981 }
982 Ok(canon_target)
983 }
984}
985
986impl<'a> Read for EntryFields<'a> {
987 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
988 loop {
989 match self.data.get_mut(0).map(|io| io.read(into)) {
990 Some(Ok(0)) => {
991 self.data.remove(0);
992 }
993 Some(r) => return r,
994 None => return Ok(0),
995 }
996 }
997 }
998}
999
1000impl<'a> Read for EntryIo<'a> {
1001 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
1002 match *self {
1003 EntryIo::Pad(ref mut io) => io.read(into),
1004 EntryIo::Data(ref mut io) => io.read(into),
1005 }
1006 }
1007}