1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::header::{SparseEntry, BLOCK_SIZE};
13use crate::other;
14use crate::pax::*;
15use crate::{Entry, GnuExtSparseHeader, Header};
16
17pub struct Archive<R: ?Sized + Read> {
21 inner: ArchiveInner<R>,
22}
23
24pub struct ArchiveInner<R: ?Sized> {
25 pos: Cell<u64>,
26 mask: u32,
27 unpack_xattrs: bool,
28 preserve_permissions: bool,
29 preserve_ownerships: bool,
30 preserve_mtime: bool,
31 overwrite: bool,
32 ignore_zeros: bool,
33 obj: RefCell<R>,
34}
35
36pub struct Entries<'a, R: 'a + Read> {
38 fields: EntriesFields<'a>,
39 _ignored: marker::PhantomData<&'a Archive<R>>,
40}
41
42trait SeekRead: Read + Seek {}
43impl<R: Read + Seek> SeekRead for R {}
44
45struct EntriesFields<'a> {
46 archive: &'a Archive<dyn Read + 'a>,
47 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
48 next: u64,
49 done: bool,
50 raw: bool,
51}
52
53impl<R: Read> Archive<R> {
54 pub fn new(obj: R) -> Archive<R> {
56 Archive {
57 inner: ArchiveInner {
58 mask: u32::MIN,
59 unpack_xattrs: false,
60 preserve_permissions: false,
61 preserve_ownerships: false,
62 preserve_mtime: true,
63 overwrite: true,
64 ignore_zeros: false,
65 obj: RefCell::new(obj),
66 pos: Cell::new(0),
67 },
68 }
69 }
70
71 pub fn into_inner(self) -> R {
73 self.inner.obj.into_inner()
74 }
75
76 pub fn entries(&mut self) -> io::Result<Entries<R>> {
83 let me: &mut Archive<dyn Read> = self;
84 me._entries(None).map(|fields| Entries {
85 fields: fields,
86 _ignored: marker::PhantomData,
87 })
88 }
89
90 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
110 let me: &mut Archive<dyn Read> = self;
111 me._unpack(dst.as_ref())
112 }
113
114 pub fn set_mask(&mut self, mask: u32) {
125 self.inner.mask = mask;
126 }
127
128 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
136 self.inner.unpack_xattrs = unpack_xattrs;
137 }
138
139 pub fn set_preserve_permissions(&mut self, preserve: bool) {
145 self.inner.preserve_permissions = preserve;
146 }
147
148 pub fn set_preserve_ownerships(&mut self, preserve: bool) {
154 self.inner.preserve_ownerships = preserve;
155 }
156
157 pub fn set_overwrite(&mut self, overwrite: bool) {
159 self.inner.overwrite = overwrite;
160 }
161
162 pub fn set_preserve_mtime(&mut self, preserve: bool) {
167 self.inner.preserve_mtime = preserve;
168 }
169
170 pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
175 self.inner.ignore_zeros = ignore_zeros;
176 }
177}
178
179impl<R: Seek + Read> Archive<R> {
180 pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
188 let me: &Archive<dyn Read> = self;
189 let me_seekable: &Archive<dyn SeekRead> = self;
190 me._entries(Some(me_seekable)).map(|fields| Entries {
191 fields: fields,
192 _ignored: marker::PhantomData,
193 })
194 }
195}
196
197impl Archive<dyn Read + '_> {
198 fn _entries<'a>(
199 &'a self,
200 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
201 ) -> io::Result<EntriesFields<'a>> {
202 if self.inner.pos.get() != 0 {
203 return Err(other(
204 "cannot call entries unless archive is at \
205 position 0",
206 ));
207 }
208 Ok(EntriesFields {
209 archive: self,
210 seekable_archive,
211 done: false,
212 next: 0,
213 raw: false,
214 })
215 }
216
217 fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
218 if dst.symlink_metadata().is_err() {
219 fs::create_dir_all(&dst)
220 .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
221 }
222
223 let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
229
230 let mut directories = Vec::new();
234 for entry in self._entries(None)? {
235 let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
236 if file.header().entry_type() == crate::EntryType::Directory {
237 directories.push(file);
238 } else {
239 file.unpack_in(dst)?;
240 }
241 }
242 for mut dir in directories {
243 dir.unpack_in(dst)?;
244 }
245
246 Ok(())
247 }
248}
249
250impl<'a, R: Read> Entries<'a, R> {
251 pub fn raw(self, raw: bool) -> Entries<'a, R> {
257 Entries {
258 fields: EntriesFields {
259 raw: raw,
260 ..self.fields
261 },
262 _ignored: marker::PhantomData,
263 }
264 }
265}
266impl<'a, R: Read> Iterator for Entries<'a, R> {
267 type Item = io::Result<Entry<'a, R>>;
268
269 fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
270 self.fields
271 .next()
272 .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
273 }
274}
275
276#[allow(unused_assignments)]
277impl<'a> EntriesFields<'a> {
278 fn next_entry_raw(
279 &mut self,
280 pax_extensions: Option<&[u8]>,
281 ) -> io::Result<Option<Entry<'a, io::Empty>>> {
282 let mut header = Header::new_old();
283 let mut header_pos = self.next;
284 loop {
285 let delta = self.next - self.archive.inner.pos.get();
287 self.skip(delta)?;
288
289 if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
291 return Ok(None);
292 }
293
294 if !header.as_bytes().iter().all(|i| *i == 0) {
298 self.next += BLOCK_SIZE as u64;
299 break;
300 }
301
302 if !self.archive.inner.ignore_zeros {
303 return Ok(None);
304 }
305 self.next += BLOCK_SIZE as u64;
306 header_pos = self.next;
307 }
308
309 let sum = header.as_bytes()[..148]
311 .iter()
312 .chain(&header.as_bytes()[156..])
313 .fold(0, |a, b| a + (*b as u32))
314 + 8 * 32;
315 let cksum = header.cksum()?;
316 if sum != cksum {
317 return Err(other("archive header checksum mismatch"));
318 }
319
320 let mut pax_size: Option<u64> = None;
321 if let Some(pax_extensions_ref) = &pax_extensions {
322 pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
323
324 if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
325 header.set_uid(pax_uid);
326 }
327
328 if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
329 header.set_gid(pax_gid);
330 }
331 }
332
333 let file_pos = self.next;
334 let mut size = header.entry_size()?;
335 if size == 0 {
336 if let Some(pax_size) = pax_size {
337 size = pax_size;
338 }
339 }
340 let ret = EntryFields {
341 size: size,
342 header_pos: header_pos,
343 file_pos: file_pos,
344 data: vec![EntryIo::Data((&self.archive.inner).take(size))],
345 header: header,
346 long_pathname: None,
347 long_linkname: None,
348 pax_extensions: None,
349 mask: self.archive.inner.mask,
350 unpack_xattrs: self.archive.inner.unpack_xattrs,
351 preserve_permissions: self.archive.inner.preserve_permissions,
352 preserve_mtime: self.archive.inner.preserve_mtime,
353 overwrite: self.archive.inner.overwrite,
354 preserve_ownerships: self.archive.inner.preserve_ownerships,
355 };
356
357 let size = size
360 .checked_add(BLOCK_SIZE as u64 - 1)
361 .ok_or_else(|| other("size overflow"))?;
362 self.next = self
363 .next
364 .checked_add(size & !(BLOCK_SIZE as u64 - 1))
365 .ok_or_else(|| other("size overflow"))?;
366
367 Ok(Some(ret.into_entry()))
368 }
369
370 fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
371 if self.raw {
372 return self.next_entry_raw(None);
373 }
374
375 let mut gnu_longname = None;
376 let mut gnu_longlink = None;
377 let mut pax_extensions = None;
378 let mut processed = 0;
379 loop {
380 processed += 1;
381 let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
382 Some(entry) => entry,
383 None if processed > 1 => {
384 return Err(other(
385 "members found describing a future member \
386 but no future member found",
387 ));
388 }
389 None => return Ok(None),
390 };
391
392 let is_recognized_header =
393 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
394
395 if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
396 if gnu_longname.is_some() {
397 return Err(other(
398 "two long name entries describing \
399 the same member",
400 ));
401 }
402 gnu_longname = Some(EntryFields::from(entry).read_all()?);
403 continue;
404 }
405
406 if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
407 if gnu_longlink.is_some() {
408 return Err(other(
409 "two long name entries describing \
410 the same member",
411 ));
412 }
413 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
414 continue;
415 }
416
417 if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
418 if pax_extensions.is_some() {
419 return Err(other(
420 "two pax extensions entries describing \
421 the same member",
422 ));
423 }
424 pax_extensions = Some(EntryFields::from(entry).read_all()?);
425 processed -= 1;
428 continue;
429 }
430
431 let mut fields = EntryFields::from(entry);
432 fields.pax_extensions = pax_extensions;
433 pax_extensions = None; fields.long_pathname = if is_recognized_header && fields.is_pax_sparse() {
437 fields.pax_sparse_name()
438 } else {
439 gnu_longname
440 };
441 fields.long_linkname = gnu_longlink;
442 self.parse_sparse_header(&mut fields)?;
443 return Ok(Some(fields.into_entry()));
444 }
445 }
446
447 fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
448 if !entry.is_pax_sparse() && !entry.header.entry_type().is_gnu_sparse() {
449 return Ok(());
450 }
451 let mut sparse_map = Vec::<SparseEntry>::new();
452 let mut real_size = 0;
453 if entry.is_pax_sparse() {
454 real_size = entry.pax_sparse_realsize()?;
455 let mut num_bytes_read = 0;
456 let mut reader = io::BufReader::with_capacity(BLOCK_SIZE, &self.archive.inner);
457 let mut read_decimal_line = || -> io::Result<u64> {
458 let mut str = String::new();
459 num_bytes_read += reader.read_line(&mut str)?;
460 str.strip_suffix("\n")
461 .and_then(|s| s.parse::<u64>().ok())
462 .ok_or_else(|| other("failed to read a decimal line"))
463 };
464
465 let num_entries = read_decimal_line()?;
466 for _ in 0..num_entries {
467 let offset = read_decimal_line()?;
468 let size = read_decimal_line()?;
469 sparse_map.push(SparseEntry { offset, size });
470 }
471 let rem = BLOCK_SIZE - (num_bytes_read % BLOCK_SIZE);
472 entry.size -= (num_bytes_read + rem) as u64;
473 } else if entry.header.entry_type().is_gnu_sparse() {
474 let gnu = match entry.header.as_gnu() {
475 Some(gnu) => gnu,
476 None => return Err(other("sparse entry type listed but not GNU header")),
477 };
478 real_size = gnu.real_size()?;
479 for block in gnu.sparse.iter() {
480 if !block.is_empty() {
481 let offset = block.offset()?;
482 let size = block.length()?;
483 sparse_map.push(SparseEntry { offset, size });
484 }
485 }
486 }
487
488 entry.data.truncate(0);
508
509 let mut cur = 0;
510 let mut remaining = entry.size;
511 {
512 let data = &mut entry.data;
513 let reader = &self.archive.inner;
514 let size = entry.size;
515 let mut add_block = |off: u64, len: u64| -> io::Result<_> {
516 if len != 0 && (size - remaining) % BLOCK_SIZE as u64 != 0 {
517 return Err(other(
518 "previous block in sparse file was not \
519 aligned to 512-byte boundary",
520 ));
521 } else if off < cur {
522 return Err(other(
523 "out of order or overlapping sparse \
524 blocks",
525 ));
526 } else if cur < off {
527 let block = io::repeat(0).take(off - cur);
528 data.push(EntryIo::Pad(block));
529 }
530 cur = off
531 .checked_add(len)
532 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
533 remaining = remaining.checked_sub(len).ok_or_else(|| {
534 other(
535 "sparse file consumed more data than the header \
536 listed",
537 )
538 })?;
539 data.push(EntryIo::Data(reader.take(len)));
540 Ok(())
541 };
542 for block in sparse_map {
543 add_block(block.offset, block.size)?
544 }
545 if entry.header.as_gnu().map(|gnu| gnu.is_extended()) == Some(true) {
546 let mut ext = GnuExtSparseHeader::new();
547 ext.isextended[0] = 1;
548 while ext.is_extended() {
549 if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
550 return Err(other("failed to read extension"));
551 }
552
553 self.next += BLOCK_SIZE as u64;
554 for block in ext.sparse.iter() {
555 if !block.is_empty() {
556 add_block(block.offset()?, block.length()?)?;
557 }
558 }
559 }
560 }
561 }
562 if cur != real_size {
563 return Err(other(
564 "mismatch in sparse file chunks and \
565 size in header",
566 ));
567 }
568 entry.size = cur;
569 if remaining > 0 {
570 return Err(other(
571 "mismatch in sparse file chunks and \
572 entry size in header",
573 ));
574 }
575 Ok(())
576 }
577
578 fn skip(&mut self, mut amt: u64) -> io::Result<()> {
579 if let Some(seekable_archive) = self.seekable_archive {
580 let pos = io::SeekFrom::Current(
581 i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
582 );
583 (&seekable_archive.inner).seek(pos)?;
584 } else {
585 let mut buf = [0u8; 4096 * 8];
586 while amt > 0 {
587 let n = cmp::min(amt, buf.len() as u64);
588 let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
589 if n == 0 {
590 return Err(other("unexpected EOF during skip"));
591 }
592 amt -= n as u64;
593 }
594 }
595 Ok(())
596 }
597}
598
599impl<'a> Iterator for EntriesFields<'a> {
600 type Item = io::Result<Entry<'a, io::Empty>>;
601
602 fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
603 if self.done {
604 None
605 } else {
606 match self.next_entry() {
607 Ok(Some(e)) => Some(Ok(e)),
608 Ok(None) => {
609 self.done = true;
610 None
611 }
612 Err(e) => {
613 self.done = true;
614 Some(Err(e))
615 }
616 }
617 }
618 }
619}
620
621impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
622 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
623 let i = self.obj.borrow_mut().read(into)?;
624 self.pos.set(self.pos.get() + i as u64);
625 Ok(i)
626 }
627}
628
629impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
630 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
631 let pos = self.obj.borrow_mut().seek(pos)?;
632 self.pos.set(pos);
633 Ok(pos)
634 }
635}
636
637fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
642 let mut read = 0;
643 while read < buf.len() {
644 match r.read(&mut buf[read..])? {
645 0 => {
646 if read == 0 {
647 return Ok(false);
648 }
649
650 return Err(other("failed to read entire block"));
651 }
652 n => read += n,
653 }
654 }
655 Ok(true)
656}