1#![warn(missing_docs)]
20
21use nom::{
22 branch::alt,
23 bytes::complete::{tag, take, take_until},
24 character::complete::{digit1, oct_digit0, space0},
25 combinator::{iterator, map, map_parser, map_res},
26 error::ErrorKind,
27 sequence::{pair, terminated},
28 *,
29};
30use std::collections::HashMap;
31
32#[derive(Debug, PartialEq, Eq)]
34pub struct TarEntry<'a> {
35 pub header: TarHeader<'a>,
37 pub contents: &'a [u8],
41}
42
43#[derive(Debug, PartialEq, Eq)]
46pub struct TarEntryStreaming<'a> {
47 pub header: TarHeader<'a>,
49 pub header_len: u64,
56 pub content_len: u64,
58 pub padding_len: u64,
60}
61
62#[derive(Debug, PartialEq, Eq)]
64pub struct TarHeader<'a> {
65 pub name: &'a str,
69 pub mode: u64,
71 pub uid: u64,
73 pub gid: u64,
75 pub size: u64,
77 pub mtime: u64,
80 pub typeflag: TypeFlag,
82 pub linkname: &'a str,
85 pub ustar: ExtraHeader<'a>,
87}
88
89#[derive(Clone, Copy, Debug, PartialEq, Eq)]
91pub enum TypeFlag {
92 NormalFile,
94 HardLink,
96 SymbolicLink,
98 CharacterSpecial,
100 BlockSpecial,
102 Directory,
104 Fifo,
106 ContiguousFile,
108 PaxGlobal,
110 Pax,
112 GnuDirectory,
115 GnuLongLink,
117 GnuLongName,
119 GnuSparse,
121 GnuVolumeHeader,
123 VendorSpecific(u8),
125}
126
127#[derive(Debug, PartialEq, Eq)]
129pub enum ExtraHeader<'a> {
130 UStar(UStarHeader<'a>),
132 Padding,
134}
135
136#[derive(Debug, PartialEq, Eq)]
138pub struct UStarHeader<'a> {
139 pub uname: &'a str,
141 pub gname: &'a str,
143 pub devmajor: u64,
145 pub devminor: u64,
147 pub extra: UStarExtraHeader<'a>,
149}
150
151#[derive(Debug, PartialEq, Eq)]
153pub enum UStarExtraHeader<'a> {
154 Posix(PosixExtraHeader<'a>),
156 Gnu(GnuExtraHeader),
158}
159
160#[derive(Debug, PartialEq, Eq)]
163pub struct PosixExtraHeader<'a> {
164 pub prefix: &'a str,
168}
169
170#[derive(Debug, PartialEq, Eq)]
172pub struct GnuExtraHeader {
173 pub atime: u64,
175 pub ctime: u64,
177 pub offset: u64,
179 pub sparses: Vec<Sparse>,
181 pub realsize: u64,
183}
184
185#[derive(Debug, PartialEq, Eq)]
187pub struct Sparse {
188 pub offset: u64,
190 pub numbytes: u64,
192}
193
194fn parse_bool(i: &[u8]) -> IResult<&[u8], bool> {
195 map(take(1usize), |i: &[u8]| i[0] != 0)(i)
196}
197
198fn parse_str(size: usize) -> impl FnMut(&[u8]) -> IResult<&[u8], &str> {
201 move |input| {
202 let s = map_res(alt((take_until("\0"), take(size))), std::str::from_utf8);
203 map_parser(take(size), s)(input)
204 }
205}
206
207fn parse_octal(n: usize) -> impl FnMut(&[u8]) -> IResult<&[u8], u64> {
209 move |i| {
210 let (rest, input) = take(n)(i)?;
211 let (i, value) = terminated(oct_digit0, space0)(input)?;
212
213 if i.input_len() == 0 || i[0] == 0 {
214 let value = value
215 .iter()
216 .fold(0, |acc, v| acc * 8 + u64::from(*v - b'0'));
217 Ok((rest, value))
218 } else {
219 Err(nom::Err::Error(error_position!(i, ErrorKind::OctDigit)))
220 }
221 }
222}
223
224fn parse_type_flag(i: &[u8]) -> IResult<&[u8], TypeFlag> {
226 let (c, rest) = match i.split_first() {
227 Some((c, rest)) => (c, rest),
228 None => return Err(nom::Err::Incomplete(Needed::new(1))),
229 };
230 let flag = match c {
231 b'0' | b'\0' => TypeFlag::NormalFile,
232 b'1' => TypeFlag::HardLink,
233 b'2' => TypeFlag::SymbolicLink,
234 b'3' => TypeFlag::CharacterSpecial,
235 b'4' => TypeFlag::BlockSpecial,
236 b'5' => TypeFlag::Directory,
237 b'6' => TypeFlag::Fifo,
238 b'7' => TypeFlag::ContiguousFile,
239 b'g' => TypeFlag::PaxGlobal,
240 b'x' | b'X' => TypeFlag::Pax,
241 b'D' => TypeFlag::GnuDirectory,
242 b'K' => TypeFlag::GnuLongLink,
243 b'L' => TypeFlag::GnuLongName,
244 b'S' => TypeFlag::GnuSparse,
245 b'V' => TypeFlag::GnuVolumeHeader,
246 b'A'..=b'Z' => TypeFlag::VendorSpecific(*c),
247 _ => return Err(nom::Err::Error(error_position!(i, ErrorKind::Fail))),
248 };
249 Ok((rest, flag))
250}
251
252fn parse_sparse(i: &[u8]) -> IResult<&[u8], Sparse> {
254 let (i, (offset, numbytes)) = pair(parse_octal(12), parse_octal(12))(i)?;
255 Ok((i, Sparse { offset, numbytes }))
256}
257
258fn parse_sparses(i: &[u8], count: usize) -> IResult<&[u8], Vec<Sparse>> {
259 let mut it = iterator(i, parse_sparse);
260 let res = it
261 .take(count)
262 .filter(|s| !(s.offset == 0 && s.numbytes == 0))
263 .collect();
264 let (i, ()) = it.finish()?;
265 Ok((i, res))
266}
267
268fn add_to_vec(sparses: &mut Vec<Sparse>, extra: Vec<Sparse>) -> &mut Vec<Sparse> {
269 sparses.extend(extra);
270 sparses
271}
272
273fn parse_extra_sparses<'a, 'b>(
274 i: &'a [u8],
275 isextended: bool,
276 sparses: &'b mut Vec<Sparse>,
277) -> IResult<&'a [u8], &'b mut Vec<Sparse>> {
278 if isextended {
279 let (i, sps) = parse_sparses(i, 21)?;
280 let (i, extended) = parse_bool(i)?;
281 let (i, _) = take(7usize)(i)?; parse_extra_sparses(i, extended, add_to_vec(sparses, sps))
284 } else {
285 Ok((i, sparses))
286 }
287}
288
289fn parse_extra_posix(i: &[u8]) -> IResult<&[u8], UStarExtraHeader<'_>> {
291 let (i, prefix) = terminated(parse_str(155), take(12usize))(i)?;
292 let header = UStarExtraHeader::Posix(PosixExtraHeader { prefix });
293 Ok((i, header))
294}
295
296fn parse_extra_gnu(i: &[u8]) -> IResult<&[u8], UStarExtraHeader<'_>> {
298 let mut sparses = Vec::new();
299
300 let (i, atime) = parse_octal(12)(i)?;
301 let (i, ctime) = parse_octal(12)(i)?;
302 let (i, offset) = parse_octal(12)(i)?;
303 let (i, _) = take(4usize)(i)?; let (i, _) = take(1usize)(i)?;
305 let (i, sps) = parse_sparses(i, 4)?;
306 let (i, isextended) = parse_bool(i)?;
307 let (i, realsize) = parse_octal(12)(i)?;
308 let (i, _) = take(17usize)(i)?; let (i, _) = parse_extra_sparses(i, isextended, add_to_vec(&mut sparses, sps))?;
311
312 let header = GnuExtraHeader {
313 atime,
314 ctime,
315 offset,
316 sparses,
317 realsize,
318 };
319 let header = UStarExtraHeader::Gnu(header);
320 Ok((i, header))
321}
322
323fn parse_ustar(
325 magic: &'static str,
326 version: &'static str,
327 mut extra: impl FnMut(&[u8]) -> IResult<&[u8], UStarExtraHeader>,
328) -> impl FnMut(&[u8]) -> IResult<&[u8], ExtraHeader> {
329 move |input| {
330 let (i, _) = tag(magic)(input)?;
331 let (i, _) = tag(version)(i)?;
332 let (i, uname) = parse_str(32)(i)?;
333 let (i, gname) = parse_str(32)(i)?;
334 let (i, devmajor) = parse_octal(8)(i)?;
335 let (i, devminor) = parse_octal(8)(i)?;
336 let (i, extra) = extra(i)?;
337
338 let header = ExtraHeader::UStar(UStarHeader {
339 uname,
340 gname,
341 devmajor,
342 devminor,
343 extra,
344 });
345 Ok((i, header))
346 }
347}
348
349fn parse_old(i: &[u8]) -> IResult<&[u8], ExtraHeader<'_>> {
351 map(take(255usize), |_| ExtraHeader::Padding)(i) }
353
354fn parse_header(i: &[u8]) -> IResult<&[u8], TarHeader<'_>> {
355 debug_assert!(i.len() >= 512);
356 let header_chksum = i[..148].iter().map(|b| *b as u64).sum::<u64>()
357 + i[156..512].iter().map(|b| *b as u64).sum::<u64>()
358 + 8 * (b' ' as u64);
359 let (i, name) = parse_str(100)(i)?;
360 let (i, mode) = parse_octal(8)(i)?;
361 let (i, uid) = parse_octal(8)(i)?;
362 let (i, gid) = parse_octal(8)(i)?;
363 let (i, size) = parse_octal(12)(i)?;
364 let (i, mtime) = parse_octal(12)(i)?;
365 let (i, chksum) = parse_octal(8)(i)?;
366 if header_chksum != chksum {
367 return Err(Err::Error(error_position!(i, ErrorKind::Fail)));
368 }
369 let (i, typeflag) = parse_type_flag(i)?;
370 let (i, linkname) = parse_str(100)(i)?;
371
372 let (i, ustar) = alt((
373 parse_ustar("ustar ", " \0", parse_extra_gnu),
374 parse_ustar("ustar\0", "00", parse_extra_posix),
375 parse_old,
376 ))(i)?;
377
378 let header = TarHeader {
379 name,
380 mode,
381 uid,
382 gid,
383 size,
384 mtime,
385 typeflag,
386 linkname,
387 ustar,
388 };
389 Ok((i, header))
390}
391
392pub fn parse_entry_streaming(i: &[u8]) -> IResult<&[u8], Option<TarEntryStreaming<'_>>> {
398 let len = i.len();
399
400 {
401 let (i, block) = take(512usize)(i)?;
403 if block == [0u8; 512] {
404 return Ok((i, None));
405 }
406 }
407 let (i, header) = parse_header(i)?;
408
409 let header_len = (len - i.len()) as u64;
410 let content_len = header.size;
411 let padding_len = match content_len % 512 {
412 0 => 0,
413 t => 512 - t,
414 };
415 Ok((
416 i,
417 Some(TarEntryStreaming {
418 header,
419 header_len,
420 content_len,
421 padding_len,
422 }),
423 ))
424}
425
426fn parse_entry(i: &[u8]) -> IResult<&[u8], Option<TarEntry<'_>>> {
427 let (i, entry) = parse_entry_streaming(i)?;
428 if let Some(entry) = entry {
429 let (i, contents) = terminated(
430 take(entry.content_len as usize),
431 take(entry.padding_len as usize),
432 )(i)?;
433 Ok((
434 i,
435 Some(TarEntry {
436 header: entry.header,
437 contents,
438 }),
439 ))
440 } else {
441 Ok((i, None))
442 }
443}
444
445pub fn parse_tar(i: &[u8]) -> IResult<&[u8], Vec<TarEntry<'_>>> {
467 let mut it = iterator(i, parse_entry);
468 let entries = it.flatten().collect();
469 let (i, ()) = it.finish()?;
470 Ok((i, entries))
471}
472
473pub fn parse_long_name(i: &[u8]) -> IResult<&[u8], &str> {
494 parse_str(i.len())(i)
495}
496
497fn parse_pax_item(i: &[u8]) -> IResult<&[u8], (&str, &str)> {
498 let (i, len) = map_res(terminated(digit1, tag(" ")), std::str::from_utf8)(i)?;
499 let (i, key) = map_res(terminated(take_until("="), tag("=")), std::str::from_utf8)(i)?;
500 let (i, value) = map_res(terminated(take_until("\n"), tag("\n")), std::str::from_utf8)(i)?;
501 if let Ok(len_usize) = len.parse::<usize>() {
502 debug_assert_eq!(len_usize, len.len() + key.len() + value.len() + 3);
503 }
504 Ok((i, (key, value)))
505}
506
507pub fn parse_pax(i: &[u8]) -> IResult<&[u8], HashMap<&str, &str>> {
529 let mut it = iterator(i, parse_pax_item);
530 let map = it.collect();
531 let (i, ()) = it.finish()?;
532 Ok((i, map))
533}
534
535#[cfg(test)]
536mod parser_test {
537 use crate::*;
538 use nom::error::ErrorKind;
539
540 const EMPTY: &[u8] = b"";
541
542 #[test]
543 fn parse_octal_ok_test() {
544 assert_eq!(parse_octal(3)(b"756"), Ok((EMPTY, 494)));
545 assert_eq!(parse_octal(8)(b"756\0 234"), Ok((EMPTY, 494)));
546 assert_eq!(parse_octal(8)(b"756 \0"), Ok((EMPTY, 494)));
547 assert_eq!(parse_octal(0)(b""), Ok((EMPTY, 0)));
548 }
549
550 #[test]
551 fn parse_octal_error_test() {
552 let t1: &[u8] = b"1238";
553 let _e: &[u8] = b"8";
554 let t2: &[u8] = b"a";
555 let t3: &[u8] = b"A";
556
557 assert_eq!(
558 parse_octal(4)(t1),
559 Err(nom::Err::Error(error_position!(_e, ErrorKind::OctDigit)))
560 );
561 assert_eq!(
562 parse_octal(1)(t2),
563 Err(nom::Err::Error(error_position!(t2, ErrorKind::OctDigit)))
564 );
565 assert_eq!(
566 parse_octal(1)(t3),
567 Err(nom::Err::Error(error_position!(t3, ErrorKind::OctDigit)))
568 );
569 }
570
571 #[test]
572 fn parse_str_test() {
573 let s: &[u8] = b"foobar\0\0\0\0baz";
574 let baz: &[u8] = b"baz";
575 assert_eq!(parse_str(10)(s), Ok((baz, "foobar")));
576 }
577
578 #[test]
579 fn parse_sparses_test() {
580 let sparses = std::iter::repeat(0u8).take(12 * 2 * 4).collect::<Vec<_>>();
581 assert_eq!(parse_sparses(&sparses, 4), Ok((EMPTY, vec![])));
582 }
583
584 #[test]
585 fn parse_pax_test() {
586 let item: &[u8] = b"25 ctime=1084839148.1212\nfoo";
587 let foo: &[u8] = b"foo";
588 assert_eq!(
589 parse_pax_item(item),
590 Ok((foo, ("ctime", "1084839148.1212")))
591 );
592 }
593}
594
595#[cfg(test)]
596mod tar_test {
597 use crate::*;
598 use std::io::{Read, Seek};
599 use tempfile::tempfile;
600
601 const LIB_RS_FILE: &str = "src/lib.rs";
602
603 #[test]
604 fn basic() {
605 let file = tempfile().unwrap();
606 let mut archive = tar::Builder::new(file);
607 archive
608 .append_path_with_name(LIB_RS_FILE, "lib.rs")
609 .unwrap();
610 let mut file = archive.into_inner().unwrap();
611 file.rewind().unwrap();
612
613 let mut buffer = vec![];
614 file.read_to_end(&mut buffer).unwrap();
615 let (_, entries) = parse_tar(&buffer).unwrap();
616 assert_eq!(entries.len(), 1);
617 assert_eq!(entries[0].header.typeflag, TypeFlag::NormalFile);
618 assert_eq!(entries[0].header.name, "lib.rs");
619 assert_eq!(entries[0].contents, std::fs::read(LIB_RS_FILE).unwrap());
620 }
621
622 #[test]
623 fn gnu_long() {
624 let name = "a".repeat(1024);
625
626 let file = tempfile().unwrap();
627 let mut archive = tar::Builder::new(file);
628 archive.append_path_with_name(LIB_RS_FILE, &name).unwrap();
629 let mut file = archive.into_inner().unwrap();
630 file.rewind().unwrap();
631
632 let mut buffer = vec![];
633 file.read_to_end(&mut buffer).unwrap();
634 let (_, entries) = parse_tar(&buffer).unwrap();
635 assert_eq!(entries.len(), 2);
636 assert_eq!(entries[0].header.typeflag, TypeFlag::GnuLongName);
637 assert_eq!(parse_long_name(entries[0].contents).unwrap().1, &name);
638 assert_eq!(entries[1].contents, std::fs::read(LIB_RS_FILE).unwrap());
639 }
640
641 #[test]
642 fn posix_long() {
643 let name_prefix = "a".repeat(80);
644 let name_postfix = "b".repeat(80);
645 let name = format!("{name_prefix}/{name_postfix}");
646
647 let file = tempfile().unwrap();
648 let mut archive = tar::Builder::new(file);
649 {
650 let mut header = tar::Header::new_ustar();
651 let file = std::fs::File::open(LIB_RS_FILE).unwrap();
652 let size = file.metadata().unwrap().len();
653 header.set_size(size);
654 archive.append_data(&mut header, name, file).unwrap();
655 }
656 let mut file = archive.into_inner().unwrap();
657 file.rewind().unwrap();
658
659 let mut buffer = vec![];
660 file.read_to_end(&mut buffer).unwrap();
661 let (_, entries) = parse_tar(&buffer).unwrap();
662 assert_eq!(entries.len(), 1);
663 assert_eq!(entries[0].header.typeflag, TypeFlag::NormalFile);
664 assert_eq!(entries[0].header.name, name_postfix);
665 if let ExtraHeader::UStar(extra) = &entries[0].header.ustar {
666 if let UStarExtraHeader::Posix(extra) = &extra.extra {
667 assert_eq!(extra.prefix, name_prefix);
668 } else {
669 unreachable!()
670 }
671 } else {
672 unreachable!()
673 }
674 assert_eq!(entries[0].contents, std::fs::read(LIB_RS_FILE).unwrap());
675 }
676}