1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::header::BLOCK_SIZE;
13use crate::other;
14use crate::pax::*;
15use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
16
17pub struct Archive<R: ?Sized + Read> {
21 inner: ArchiveInner<R>,
22}
23
24pub struct ArchiveInner<R: ?Sized> {
25 pos: Cell<u64>,
26 mask: u32,
27 unpack_xattrs: bool,
28 preserve_permissions: bool,
29 preserve_ownerships: bool,
30 preserve_mtime: bool,
31 overwrite: bool,
32 ignore_zeros: bool,
33 obj: RefCell<R>,
34}
35
36pub struct Entries<'a, R: 'a + Read> {
38 fields: EntriesFields<'a>,
39 _ignored: marker::PhantomData<&'a Archive<R>>,
40}
41
42trait SeekRead: Read + Seek {}
43impl<R: Read + Seek> SeekRead for R {}
44
45struct EntriesFields<'a> {
46 archive: &'a Archive<dyn Read + 'a>,
47 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
48 next: u64,
49 done: bool,
50 raw: bool,
51}
52
53impl<R: Read> Archive<R> {
54 pub fn new(obj: R) -> Archive<R> {
56 Archive {
57 inner: ArchiveInner {
58 mask: u32::MIN,
59 unpack_xattrs: false,
60 preserve_permissions: false,
61 preserve_ownerships: false,
62 preserve_mtime: true,
63 overwrite: true,
64 ignore_zeros: false,
65 obj: RefCell::new(obj),
66 pos: Cell::new(0),
67 },
68 }
69 }
70
71 pub fn into_inner(self) -> R {
73 self.inner.obj.into_inner()
74 }
75
76 pub fn entries(&mut self) -> io::Result<Entries<'_, R>> {
83 let me: &mut Archive<dyn Read> = self;
84 me._entries(None).map(|fields| Entries {
85 fields,
86 _ignored: marker::PhantomData,
87 })
88 }
89
90 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
122 let me: &mut Archive<dyn Read> = self;
123 me._unpack(dst.as_ref())
124 }
125
126 pub fn set_mask(&mut self, mask: u32) {
137 self.inner.mask = mask;
138 }
139
140 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
148 self.inner.unpack_xattrs = unpack_xattrs;
149 }
150
151 pub fn set_preserve_permissions(&mut self, preserve: bool) {
157 self.inner.preserve_permissions = preserve;
158 }
159
160 pub fn set_preserve_ownerships(&mut self, preserve: bool) {
166 self.inner.preserve_ownerships = preserve;
167 }
168
169 pub fn set_overwrite(&mut self, overwrite: bool) {
171 self.inner.overwrite = overwrite;
172 }
173
174 pub fn set_preserve_mtime(&mut self, preserve: bool) {
179 self.inner.preserve_mtime = preserve;
180 }
181
182 pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
187 self.inner.ignore_zeros = ignore_zeros;
188 }
189}
190
191impl<R: Seek + Read> Archive<R> {
192 pub fn entries_with_seek(&mut self) -> io::Result<Entries<'_, R>> {
200 let me: &Archive<dyn Read> = self;
201 let me_seekable: &Archive<dyn SeekRead> = self;
202 me._entries(Some(me_seekable)).map(|fields| Entries {
203 fields,
204 _ignored: marker::PhantomData,
205 })
206 }
207}
208
209impl Archive<dyn Read + '_> {
210 fn _entries<'a>(
211 &'a self,
212 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
213 ) -> io::Result<EntriesFields<'a>> {
214 if self.inner.pos.get() != 0 {
215 return Err(other(
216 "cannot call entries unless archive is at \
217 position 0",
218 ));
219 }
220 Ok(EntriesFields {
221 archive: self,
222 seekable_archive,
223 done: false,
224 next: 0,
225 raw: false,
226 })
227 }
228
229 fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
230 if dst.symlink_metadata().is_err() {
231 fs::create_dir_all(dst)
232 .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
233 }
234
235 let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
241
242 let mut directories = Vec::new();
246 for entry in self._entries(None)? {
247 let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
248 if file.header().entry_type() == crate::EntryType::Directory {
249 directories.push(file);
250 } else {
251 file.unpack_in(dst)?;
252 }
253 }
254
255 directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes()));
263 for mut dir in directories {
264 dir.unpack_in(dst)?;
265 }
266
267 Ok(())
268 }
269}
270
271impl<'a, R: Read> Entries<'a, R> {
272 pub fn raw(self, raw: bool) -> Entries<'a, R> {
278 Entries {
279 fields: EntriesFields { raw, ..self.fields },
280 _ignored: marker::PhantomData,
281 }
282 }
283}
284impl<'a, R: Read> Iterator for Entries<'a, R> {
285 type Item = io::Result<Entry<'a, R>>;
286
287 fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
288 self.fields
289 .next()
290 .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
291 }
292}
293
294impl<'a> EntriesFields<'a> {
295 fn next_entry_raw(
296 &mut self,
297 pax_extensions: Option<&[u8]>,
298 ) -> io::Result<Option<Entry<'a, io::Empty>>> {
299 let mut header = Header::new_old();
300 let mut header_pos = self.next;
301 loop {
302 let delta = self.next - self.archive.inner.pos.get();
304 self.skip(delta)?;
305
306 if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
308 return Ok(None);
309 }
310
311 if !header.as_bytes().iter().all(|i| *i == 0) {
315 self.next += BLOCK_SIZE;
316 break;
317 }
318
319 if !self.archive.inner.ignore_zeros {
320 return Ok(None);
321 }
322 self.next += BLOCK_SIZE;
323 header_pos = self.next;
324 }
325
326 let sum = header.as_bytes()[..148]
328 .iter()
329 .chain(&header.as_bytes()[156..])
330 .fold(0, |a, b| a + (*b as u32))
331 + 8 * 32;
332 let cksum = header.cksum()?;
333 if sum != cksum {
334 return Err(other("archive header checksum mismatch"));
335 }
336
337 let mut pax_size: Option<u64> = None;
338 if let Some(pax_extensions_ref) = &pax_extensions {
339 pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
340
341 if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
342 header.set_uid(pax_uid);
343 }
344
345 if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
346 header.set_gid(pax_gid);
347 }
348 }
349
350 let file_pos = self.next;
351 let mut size = header.entry_size()?;
352 if let Some(pax_size) = pax_size {
356 size = pax_size;
357 }
358 let ret = EntryFields {
359 size,
360 header_pos,
361 file_pos,
362 data: vec![EntryIo::Data((&self.archive.inner).take(size))],
363 header,
364 long_pathname: None,
365 long_linkname: None,
366 pax_extensions: None,
367 mask: self.archive.inner.mask,
368 unpack_xattrs: self.archive.inner.unpack_xattrs,
369 preserve_permissions: self.archive.inner.preserve_permissions,
370 preserve_mtime: self.archive.inner.preserve_mtime,
371 overwrite: self.archive.inner.overwrite,
372 preserve_ownerships: self.archive.inner.preserve_ownerships,
373 };
374
375 let size = size
378 .checked_add(BLOCK_SIZE - 1)
379 .ok_or_else(|| other("size overflow"))?;
380 self.next = self
381 .next
382 .checked_add(size & !(BLOCK_SIZE - 1))
383 .ok_or_else(|| other("size overflow"))?;
384
385 Ok(Some(ret.into_entry()))
386 }
387
388 fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
389 if self.raw {
390 return self.next_entry_raw(None);
391 }
392
393 let mut gnu_longname = None;
394 let mut gnu_longlink = None;
395 let mut pax_extensions = None;
396 let mut processed = 0;
397 loop {
398 processed += 1;
399 let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
400 Some(entry) => entry,
401 None if processed > 1 => {
402 return Err(other(
403 "members found describing a future member \
404 but no future member found",
405 ));
406 }
407 None => return Ok(None),
408 };
409
410 let is_recognized_header =
411 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
412
413 if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
414 if gnu_longname.is_some() {
415 return Err(other(
416 "two long name entries describing \
417 the same member",
418 ));
419 }
420 gnu_longname = Some(EntryFields::from(entry).read_all()?);
421 continue;
422 }
423
424 if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
425 if gnu_longlink.is_some() {
426 return Err(other(
427 "two long name entries describing \
428 the same member",
429 ));
430 }
431 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
432 continue;
433 }
434
435 if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
436 if pax_extensions.is_some() {
437 return Err(other(
438 "two pax extensions entries describing \
439 the same member",
440 ));
441 }
442 pax_extensions = Some(EntryFields::from(entry).read_all()?);
443 continue;
444 }
445
446 let mut fields = EntryFields::from(entry);
447 fields.long_pathname = gnu_longname;
448 fields.long_linkname = gnu_longlink;
449 fields.pax_extensions = pax_extensions;
450 self.parse_sparse_header(&mut fields)?;
451 return Ok(Some(fields.into_entry()));
452 }
453 }
454
455 fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
456 if !entry.header.entry_type().is_gnu_sparse() {
457 return Ok(());
458 }
459 let gnu = match entry.header.as_gnu() {
460 Some(gnu) => gnu,
461 None => return Err(other("sparse entry type listed but not GNU header")),
462 };
463
464 entry.data.truncate(0);
484
485 let mut cur = 0;
486 let mut remaining = entry.size;
487 {
488 let data = &mut entry.data;
489 let reader = &self.archive.inner;
490 let size = entry.size;
491 let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
492 if block.is_empty() {
493 return Ok(());
494 }
495 let off = block.offset()?;
496 let len = block.length()?;
497 if len != 0 && (size - remaining) % BLOCK_SIZE != 0 {
498 return Err(other(
499 "previous block in sparse file was not \
500 aligned to 512-byte boundary",
501 ));
502 } else if off < cur {
503 return Err(other(
504 "out of order or overlapping sparse \
505 blocks",
506 ));
507 } else if cur < off {
508 let block = io::repeat(0).take(off - cur);
509 data.push(EntryIo::Pad(block));
510 }
511 cur = off
512 .checked_add(len)
513 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
514 remaining = remaining.checked_sub(len).ok_or_else(|| {
515 other(
516 "sparse file consumed more data than the header \
517 listed",
518 )
519 })?;
520 data.push(EntryIo::Data(reader.take(len)));
521 Ok(())
522 };
523 for block in gnu.sparse.iter() {
524 add_block(block)?
525 }
526 if gnu.is_extended() {
527 let mut ext = GnuExtSparseHeader::new();
528 ext.isextended[0] = 1;
529 while ext.is_extended() {
530 if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
531 return Err(other("failed to read extension"));
532 }
533
534 self.next += BLOCK_SIZE;
535 for block in ext.sparse.iter() {
536 add_block(block)?;
537 }
538 }
539 }
540 }
541 if cur != gnu.real_size()? {
542 return Err(other(
543 "mismatch in sparse file chunks and \
544 size in header",
545 ));
546 }
547 entry.size = cur;
548 if remaining > 0 {
549 return Err(other(
550 "mismatch in sparse file chunks and \
551 entry size in header",
552 ));
553 }
554 Ok(())
555 }
556
557 fn skip(&mut self, mut amt: u64) -> io::Result<()> {
558 if let Some(seekable_archive) = self.seekable_archive {
559 let pos = io::SeekFrom::Current(
560 i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
561 );
562 (&seekable_archive.inner).seek(pos)?;
563 } else {
564 let mut buf = [0u8; 4096 * 8];
565 while amt > 0 {
566 let n = cmp::min(amt, buf.len() as u64);
567 let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
568 if n == 0 {
569 return Err(other("unexpected EOF during skip"));
570 }
571 amt -= n as u64;
572 }
573 }
574 Ok(())
575 }
576}
577
578impl<'a> Iterator for EntriesFields<'a> {
579 type Item = io::Result<Entry<'a, io::Empty>>;
580
581 fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
582 if self.done {
583 None
584 } else {
585 match self.next_entry() {
586 Ok(Some(e)) => Some(Ok(e)),
587 Ok(None) => {
588 self.done = true;
589 None
590 }
591 Err(e) => {
592 self.done = true;
593 Some(Err(e))
594 }
595 }
596 }
597 }
598}
599
600impl<R: ?Sized + Read> Read for &ArchiveInner<R> {
601 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
602 let i = self.obj.borrow_mut().read(into)?;
603 self.pos.set(self.pos.get() + i as u64);
604 Ok(i)
605 }
606}
607
608impl<R: ?Sized + Seek> Seek for &ArchiveInner<R> {
609 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
610 let pos = self.obj.borrow_mut().seek(pos)?;
611 self.pos.set(pos);
612 Ok(pos)
613 }
614}
615
616fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
621 let mut read = 0;
622 while read < buf.len() {
623 match r.read(&mut buf[read..])? {
624 0 => {
625 if read == 0 {
626 return Ok(false);
627 }
628
629 return Err(other("failed to read entire block"));
630 }
631 n => read += n,
632 }
633 }
634 Ok(true)
635}