1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::other;
13use crate::pax::*;
14use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
15
16pub struct Archive<R: ?Sized + Read> {
20 inner: ArchiveInner<R>,
21}
22
23pub struct ArchiveInner<R: ?Sized> {
24 pos: Cell<u64>,
25 mask: u32,
26 unpack_xattrs: bool,
27 preserve_permissions: bool,
28 preserve_ownerships: bool,
29 preserve_mtime: bool,
30 overwrite: bool,
31 ignore_zeros: bool,
32 obj: RefCell<R>,
33}
34
35pub struct Entries<'a, R: 'a + Read> {
37 fields: EntriesFields<'a>,
38 _ignored: marker::PhantomData<&'a Archive<R>>,
39}
40
41trait SeekRead: Read + Seek {}
42impl<R: Read + Seek> SeekRead for R {}
43
44struct EntriesFields<'a> {
45 archive: &'a Archive<dyn Read + 'a>,
46 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
47 next: u64,
48 done: bool,
49 raw: bool,
50}
51
52impl<R: Read> Archive<R> {
53 pub fn new(obj: R) -> Archive<R> {
55 Archive {
56 inner: ArchiveInner {
57 mask: u32::MIN,
58 unpack_xattrs: false,
59 preserve_permissions: false,
60 preserve_ownerships: false,
61 preserve_mtime: true,
62 overwrite: true,
63 ignore_zeros: false,
64 obj: RefCell::new(obj),
65 pos: Cell::new(0),
66 },
67 }
68 }
69
70 pub fn into_inner(self) -> R {
72 self.inner.obj.into_inner()
73 }
74
75 pub fn entries(&mut self) -> io::Result<Entries<R>> {
82 let me: &mut Archive<dyn Read> = self;
83 me._entries(None).map(|fields| Entries {
84 fields: fields,
85 _ignored: marker::PhantomData,
86 })
87 }
88
89 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
109 let me: &mut Archive<dyn Read> = self;
110 me._unpack(dst.as_ref())
111 }
112
113 pub fn set_mask(&mut self, mask: u32) {
124 self.inner.mask = mask;
125 }
126
127 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
135 self.inner.unpack_xattrs = unpack_xattrs;
136 }
137
138 pub fn set_preserve_permissions(&mut self, preserve: bool) {
144 self.inner.preserve_permissions = preserve;
145 }
146
147 pub fn set_preserve_ownerships(&mut self, preserve: bool) {
153 self.inner.preserve_ownerships = preserve;
154 }
155
156 pub fn set_overwrite(&mut self, overwrite: bool) {
158 self.inner.overwrite = overwrite;
159 }
160
161 pub fn set_preserve_mtime(&mut self, preserve: bool) {
166 self.inner.preserve_mtime = preserve;
167 }
168
169 pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
174 self.inner.ignore_zeros = ignore_zeros;
175 }
176}
177
178impl<R: Seek + Read> Archive<R> {
179 pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
187 let me: &Archive<dyn Read> = self;
188 let me_seekable: &Archive<dyn SeekRead> = self;
189 me._entries(Some(me_seekable)).map(|fields| Entries {
190 fields: fields,
191 _ignored: marker::PhantomData,
192 })
193 }
194}
195
196impl Archive<dyn Read + '_> {
197 fn _entries<'a>(
198 &'a self,
199 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
200 ) -> io::Result<EntriesFields<'a>> {
201 if self.inner.pos.get() != 0 {
202 return Err(other(
203 "cannot call entries unless archive is at \
204 position 0",
205 ));
206 }
207 Ok(EntriesFields {
208 archive: self,
209 seekable_archive,
210 done: false,
211 next: 0,
212 raw: false,
213 })
214 }
215
216 fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
217 if dst.symlink_metadata().is_err() {
218 fs::create_dir_all(&dst)
219 .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
220 }
221
222 let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
228
229 let mut directories = Vec::new();
233 for entry in self._entries(None)? {
234 let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
235 if file.header().entry_type() == crate::EntryType::Directory {
236 directories.push(file);
237 } else {
238 file.unpack_in(dst)?;
239 }
240 }
241 for mut dir in directories {
242 dir.unpack_in(dst)?;
243 }
244
245 Ok(())
246 }
247}
248
249impl<'a, R: Read> Entries<'a, R> {
250 pub fn raw(self, raw: bool) -> Entries<'a, R> {
256 Entries {
257 fields: EntriesFields {
258 raw: raw,
259 ..self.fields
260 },
261 _ignored: marker::PhantomData,
262 }
263 }
264}
265impl<'a, R: Read> Iterator for Entries<'a, R> {
266 type Item = io::Result<Entry<'a, R>>;
267
268 fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
269 self.fields
270 .next()
271 .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
272 }
273}
274
275impl<'a> EntriesFields<'a> {
276 fn next_entry_raw(
277 &mut self,
278 pax_extensions: Option<&[u8]>,
279 ) -> io::Result<Option<Entry<'a, io::Empty>>> {
280 let mut header = Header::new_old();
281 let mut header_pos = self.next;
282 loop {
283 let delta = self.next - self.archive.inner.pos.get();
285 self.skip(delta)?;
286
287 if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
289 return Ok(None);
290 }
291
292 if !header.as_bytes().iter().all(|i| *i == 0) {
296 self.next += 512;
297 break;
298 }
299
300 if !self.archive.inner.ignore_zeros {
301 return Ok(None);
302 }
303 self.next += 512;
304 header_pos = self.next;
305 }
306
307 let sum = header.as_bytes()[..148]
309 .iter()
310 .chain(&header.as_bytes()[156..])
311 .fold(0, |a, b| a + (*b as u32))
312 + 8 * 32;
313 let cksum = header.cksum()?;
314 if sum != cksum {
315 return Err(other("archive header checksum mismatch"));
316 }
317
318 let mut pax_size: Option<u64> = None;
319 if let Some(pax_extensions_ref) = &pax_extensions {
320 pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
321
322 if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
323 header.set_uid(pax_uid);
324 }
325
326 if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
327 header.set_gid(pax_gid);
328 }
329 }
330
331 let file_pos = self.next;
332 let mut size = header.entry_size()?;
333 if size == 0 {
334 if let Some(pax_size) = pax_size {
335 size = pax_size;
336 }
337 }
338 let ret = EntryFields {
339 size: size,
340 header_pos: header_pos,
341 file_pos: file_pos,
342 data: vec![EntryIo::Data((&self.archive.inner).take(size))],
343 header: header,
344 long_pathname: None,
345 long_linkname: None,
346 pax_extensions: None,
347 mask: self.archive.inner.mask,
348 unpack_xattrs: self.archive.inner.unpack_xattrs,
349 preserve_permissions: self.archive.inner.preserve_permissions,
350 preserve_mtime: self.archive.inner.preserve_mtime,
351 overwrite: self.archive.inner.overwrite,
352 preserve_ownerships: self.archive.inner.preserve_ownerships,
353 };
354
355 let size = size
358 .checked_add(511)
359 .ok_or_else(|| other("size overflow"))?;
360 self.next = self
361 .next
362 .checked_add(size & !(512 - 1))
363 .ok_or_else(|| other("size overflow"))?;
364
365 Ok(Some(ret.into_entry()))
366 }
367
368 fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
369 if self.raw {
370 return self.next_entry_raw(None);
371 }
372
373 let mut gnu_longname = None;
374 let mut gnu_longlink = None;
375 let mut pax_extensions = None;
376 let mut processed = 0;
377 loop {
378 processed += 1;
379 let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
380 Some(entry) => entry,
381 None if processed > 1 => {
382 return Err(other(
383 "members found describing a future member \
384 but no future member found",
385 ));
386 }
387 None => return Ok(None),
388 };
389
390 let is_recognized_header =
391 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
392
393 if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
394 if gnu_longname.is_some() {
395 return Err(other(
396 "two long name entries describing \
397 the same member",
398 ));
399 }
400 gnu_longname = Some(EntryFields::from(entry).read_all()?);
401 continue;
402 }
403
404 if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
405 if gnu_longlink.is_some() {
406 return Err(other(
407 "two long name entries describing \
408 the same member",
409 ));
410 }
411 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
412 continue;
413 }
414
415 if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
416 if pax_extensions.is_some() {
417 return Err(other(
418 "two pax extensions entries describing \
419 the same member",
420 ));
421 }
422 pax_extensions = Some(EntryFields::from(entry).read_all()?);
423 continue;
424 }
425
426 let mut fields = EntryFields::from(entry);
427 fields.long_pathname = gnu_longname;
428 fields.long_linkname = gnu_longlink;
429 fields.pax_extensions = pax_extensions;
430 self.parse_sparse_header(&mut fields)?;
431 return Ok(Some(fields.into_entry()));
432 }
433 }
434
435 fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
436 if !entry.header.entry_type().is_gnu_sparse() {
437 return Ok(());
438 }
439 let gnu = match entry.header.as_gnu() {
440 Some(gnu) => gnu,
441 None => return Err(other("sparse entry type listed but not GNU header")),
442 };
443
444 entry.data.truncate(0);
464
465 let mut cur = 0;
466 let mut remaining = entry.size;
467 {
468 let data = &mut entry.data;
469 let reader = &self.archive.inner;
470 let size = entry.size;
471 let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
472 if block.is_empty() {
473 return Ok(());
474 }
475 let off = block.offset()?;
476 let len = block.length()?;
477 if len != 0 && (size - remaining) % 512 != 0 {
478 return Err(other(
479 "previous block in sparse file was not \
480 aligned to 512-byte boundary",
481 ));
482 } else if off < cur {
483 return Err(other(
484 "out of order or overlapping sparse \
485 blocks",
486 ));
487 } else if cur < off {
488 let block = io::repeat(0).take(off - cur);
489 data.push(EntryIo::Pad(block));
490 }
491 cur = off
492 .checked_add(len)
493 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
494 remaining = remaining.checked_sub(len).ok_or_else(|| {
495 other(
496 "sparse file consumed more data than the header \
497 listed",
498 )
499 })?;
500 data.push(EntryIo::Data(reader.take(len)));
501 Ok(())
502 };
503 for block in gnu.sparse.iter() {
504 add_block(block)?
505 }
506 if gnu.is_extended() {
507 let mut ext = GnuExtSparseHeader::new();
508 ext.isextended[0] = 1;
509 while ext.is_extended() {
510 if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
511 return Err(other("failed to read extension"));
512 }
513
514 self.next += 512;
515 for block in ext.sparse.iter() {
516 add_block(block)?;
517 }
518 }
519 }
520 }
521 if cur != gnu.real_size()? {
522 return Err(other(
523 "mismatch in sparse file chunks and \
524 size in header",
525 ));
526 }
527 entry.size = cur;
528 if remaining > 0 {
529 return Err(other(
530 "mismatch in sparse file chunks and \
531 entry size in header",
532 ));
533 }
534 Ok(())
535 }
536
537 fn skip(&mut self, mut amt: u64) -> io::Result<()> {
538 if let Some(seekable_archive) = self.seekable_archive {
539 let pos = io::SeekFrom::Current(
540 i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
541 );
542 (&seekable_archive.inner).seek(pos)?;
543 } else {
544 let mut buf = [0u8; 4096 * 8];
545 while amt > 0 {
546 let n = cmp::min(amt, buf.len() as u64);
547 let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
548 if n == 0 {
549 return Err(other("unexpected EOF during skip"));
550 }
551 amt -= n as u64;
552 }
553 }
554 Ok(())
555 }
556}
557
558impl<'a> Iterator for EntriesFields<'a> {
559 type Item = io::Result<Entry<'a, io::Empty>>;
560
561 fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
562 if self.done {
563 None
564 } else {
565 match self.next_entry() {
566 Ok(Some(e)) => Some(Ok(e)),
567 Ok(None) => {
568 self.done = true;
569 None
570 }
571 Err(e) => {
572 self.done = true;
573 Some(Err(e))
574 }
575 }
576 }
577 }
578}
579
580impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
581 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
582 let i = self.obj.borrow_mut().read(into)?;
583 self.pos.set(self.pos.get() + i as u64);
584 Ok(i)
585 }
586}
587
588impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
589 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
590 let pos = self.obj.borrow_mut().seek(pos)?;
591 self.pos.set(pos);
592 Ok(pos)
593 }
594}
595
596fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
601 let mut read = 0;
602 while read < buf.len() {
603 match r.read(&mut buf[read..])? {
604 0 => {
605 if read == 0 {
606 return Ok(false);
607 }
608
609 return Err(other("failed to read entire block"));
610 }
611 n => read += n,
612 }
613 }
614 Ok(true)
615}