tar/
parser.rs

1use std::str::{from_utf8, Utf8Error};
2use nom::*;
3use nom::types::CompleteByteSlice;
4
5/*
6 * Core structs
7 */
8
9#[derive(Debug,PartialEq,Eq)]
10pub struct TarEntry<'a> {
11    pub header:   PosixHeader<'a>,
12    pub contents: &'a [u8]
13}
14
15#[derive(Debug,PartialEq,Eq)]
16pub struct PosixHeader<'a> {
17    pub name:     &'a str,
18    pub mode:     u64,
19    pub uid:      u64,
20    pub gid:      u64,
21    pub size:     u64,
22    pub mtime:    u64,
23    pub chksum:   &'a str,
24    pub typeflag: TypeFlag,
25    pub linkname: &'a str,
26    pub ustar:    ExtraHeader<'a>
27}
28
29/* TODO: support more vendor specific */
30#[derive(Debug,PartialEq,Eq)]
31pub enum TypeFlag {
32    NormalFile,
33    HardLink,
34    SymbolicLink,
35    CharacterSpecial,
36    BlockSpecial,
37    Directory,
38    FIFO,
39    ContiguousFile,
40    PaxInterexchangeFormat,
41    PaxExtendedAttributes,
42    GNULongName,
43    VendorSpecific
44}
45
46#[derive(Debug,PartialEq,Eq)]
47pub enum ExtraHeader<'a> {
48    UStar(UStarHeader<'a>),
49    Padding
50}
51
52#[derive(Debug,PartialEq,Eq)]
53pub struct UStarHeader<'a> {
54    pub magic:    &'a str,
55    pub version:  &'a str,
56    pub uname:    &'a str,
57    pub gname:    &'a str,
58    pub devmajor: u64,
59    pub devminor: u64,
60    pub extra:    UStarExtraHeader<'a>
61}
62
63#[derive(Debug,PartialEq,Eq)]
64pub enum UStarExtraHeader<'a> {
65    PosixUStar(PosixUStarHeader<'a>),
66    GNULongName(GNULongNameHeader<'a>),
67    Pax(PaxHeader<'a>)
68}
69
70#[derive(Debug,PartialEq,Eq)]
71pub struct PosixUStarHeader<'a> {
72    pub prefix: &'a str
73}
74
75#[derive(Debug,PartialEq,Eq)]
76pub struct GNULongNameHeader<'a> {
77    pub name: &'a str
78}
79
80#[derive(Debug,PartialEq,Eq)]
81pub struct PaxHeader<'a> {
82    pub atime:      u64,
83    pub ctime:      u64,
84    pub offset:     u64,
85    pub longnames:  &'a str,
86    pub sparses:    Vec<Sparse>,
87    pub isextended: bool,
88    pub realsize:   u64,
89}
90
91#[derive(Debug,PartialEq,Eq)]
92pub struct Sparse {
93    pub offset:   u64,
94    pub numbytes: u64
95}
96
97#[derive(Debug,PartialEq,Eq)]
98pub struct Padding;
99
100/*
101 * Useful macros
102 */
103
104named!(parse_bool<CompleteByteSlice<'_>, bool>, map!(take!(1), |i: CompleteByteSlice<'_>| i[0] != 0));
105
106macro_rules! take_str_eat_garbage (
107    ( $i:expr, $size:expr ) => ({
108        let _size: usize = $size;
109        fn from_utf8_complete<'a>(s: CompleteByteSlice<'a>)-> Result<&'a str, Utf8Error> {
110            from_utf8(s.0)
111        }
112        do_parse!($i,
113            s:      map_res!(take_until!("\0"), from_utf8_complete)  >>
114            length: expr_opt!({_size.checked_sub(s.len())}) >>
115            take!(length)                                   >>
116            (s)
117        )
118    });
119);
120
121named!(parse_str4<CompleteByteSlice<'_>, &str>,   take_str_eat_garbage!(4));
122named!(parse_str8<CompleteByteSlice<'_>, &str>,   take_str_eat_garbage!(8));
123named!(parse_str32<CompleteByteSlice<'_>, &str>,  take_str_eat_garbage!(32));
124named!(parse_str100<CompleteByteSlice<'_>, &str>, take_str_eat_garbage!(100));
125named!(parse_str155<CompleteByteSlice<'_>, &str>, take_str_eat_garbage!(155));
126named!(parse_str512<CompleteByteSlice<'_>, &str>, take_str_eat_garbage!(512));
127
128/*
129 * Octal string parsing
130 */
131
132macro_rules! take1_if {
133    ($input:expr, $submac:ident!( $($args:tt)* )) => ({
134        let input: CompleteByteSlice<'_> = $input;
135        let res: IResult<_, _> = if input.is_empty() {
136            Err(nom::Err::Incomplete(Needed::Size(1)))
137        } else if ! $submac!(input[0], $($args)*) {
138            Err(nom::Err::Error(error_position!(input, ErrorKind::OctDigit)))
139        } else {
140            Ok((input.slice(1..), input[0]))
141        };
142        res
143    });
144    ($input:expr, $f:expr) => (
145        take1_if!($input, call!($f));
146    );
147}
148
149named!(take_oct_digit<CompleteByteSlice<'_>, u8>, take1_if!(is_oct_digit));
150named!(take_oct_digit_value<CompleteByteSlice<'_>, u64>, map!(take_oct_digit, |c| (c as u64) - ('0' as u64)));
151
152pub fn parse_octal(i: CompleteByteSlice<'_>, n: usize) -> IResult<CompleteByteSlice<'_>, u64> {
153    if i.len() < n {
154        Err(nom::Err::Incomplete(Needed::Size(n)))
155    } else {
156        let res = do_parse!(i,
157            number: fold_many_m_n!(0, n, take_oct_digit_value, 0, |acc, v| acc * 8 + v) >>
158            take_while!(is_space) >>
159            (number)
160        );
161
162        if let Ok((_i, val)) = res {
163            if (i.len() - _i.len()) == n || _i[0] == 0 {
164                Ok((i.slice(n..), val))
165            } else {
166                Err(nom::Err::Error(error_position!(_i, ErrorKind::OctDigit)))
167            }
168        } else {
169            res
170        }
171    }
172}
173
174named!(parse_octal8<CompleteByteSlice<'_>, u64>,  apply!(parse_octal, 8));
175named!(parse_octal12<CompleteByteSlice<'_>, u64>, apply!(parse_octal, 12));
176
177/*
178 * TypeFlag parsing
179 */
180
181fn char_to_type_flag(c: char) -> TypeFlag {
182    match c {
183        '0' | '\0'  => TypeFlag::NormalFile,
184        '1'         => TypeFlag::HardLink,
185        '2'         => TypeFlag::SymbolicLink,
186        '3'         => TypeFlag::CharacterSpecial,
187        '4'         => TypeFlag::BlockSpecial,
188        '5'         => TypeFlag::Directory,
189        '6'         => TypeFlag::FIFO,
190        '7'         => TypeFlag::ContiguousFile,
191        'g'         => TypeFlag::PaxInterexchangeFormat,
192        'x'         => TypeFlag::PaxExtendedAttributes,
193        'L'         => TypeFlag::GNULongName,
194        'A' ..= 'Z' => TypeFlag::VendorSpecific,
195        _           => TypeFlag::NormalFile
196    }
197}
198
199fn bytes_to_type_flag(i: CompleteByteSlice<'_>) -> TypeFlag {
200    char_to_type_flag(i[0] as char)
201}
202
203named!(parse_type_flag<CompleteByteSlice<'_>, TypeFlag>, map!(take!(1), bytes_to_type_flag));
204
205/*
206 * Sparse parsing
207 */
208
209named!(parse_one_sparse<CompleteByteSlice<'_>, Sparse>, do_parse!(offset: parse_octal12 >> numbytes: parse_octal12 >> (Sparse { offset: offset, numbytes: numbytes })));
210
211fn parse_sparses_with_limit(i: CompleteByteSlice<'_>, limit: usize) -> IResult<CompleteByteSlice<'_>, Vec<Sparse>> {
212    let mut res = Ok((i, Vec::new()));
213
214    for _ in 0..limit {
215        if let Ok((i, mut sparses)) = res {
216            let mut out = false;
217            res = map!(i, call!(parse_one_sparse), |sp: Sparse| {
218                if sp.offset == 0 && sp.numbytes == 0 {
219                    out = true
220                } else {
221                    sparses.push(sp);
222                }
223                sparses
224            });
225            if out {
226                break;
227            }
228        } else {
229            break;
230        }
231    }
232
233    res
234}
235
236fn add_to_vec(sparses: &mut Vec<Sparse>, extra: Vec<Sparse>) -> &mut Vec<Sparse> {
237    sparses.extend(extra);
238    sparses
239}
240
241fn parse_extra_sparses<'a, 'b>(i: CompleteByteSlice<'a>, isextended: bool, sparses: &'b mut Vec<Sparse>) -> IResult<CompleteByteSlice<'a>, &'b mut Vec<Sparse>> {
242    if isextended {
243        do_parse!(i,
244            sps:           apply!(parse_sparses_with_limit, 21)                            >>
245            extended:      parse_bool                                                      >>
246            take!(7) /* padding to 512 */                                                  >>
247            extra_sparses: apply!(parse_extra_sparses, extended, add_to_vec(sparses, sps)) >>
248            (extra_sparses)
249        )
250    } else {
251        Ok((i, sparses))
252    }
253}
254
255/*
256 * UStar PAX extended parsing
257 */
258
259fn parse_ustar00_extra_pax(i: CompleteByteSlice<'_>) -> IResult<CompleteByteSlice<'_>, PaxHeader<'_>> {
260    let mut sparses = Vec::new();
261
262    do_parse!(i,
263        atime:      parse_octal12                                              >>
264        ctime:      parse_octal12                                              >>
265        offset:     parse_octal12                                              >>
266        longnames:  parse_str4                                                 >>
267        take!(1)                                                               >>
268        sps:        apply!(parse_sparses_with_limit, 4)                        >>
269        isextended: parse_bool                                                 >>
270        realsize:   parse_octal12                                              >>
271        take!(17) /* padding to 512 */                                         >>
272        apply!(parse_extra_sparses, isextended, add_to_vec(&mut sparses, sps)) >>
273        (PaxHeader {
274            atime:      atime,
275            ctime:      ctime,
276            offset:     offset,
277            longnames:  longnames,
278            sparses:    sparses,
279            isextended: isextended,
280            realsize:   realsize,
281        })
282    )
283}
284
285/*
286 * UStar Posix parsing
287 */
288
289named!(parse_ustar00_extra_posix<CompleteByteSlice<'_>, UStarExtraHeader<'_>>, do_parse!(prefix: parse_str155 >> take!(12) >> (UStarExtraHeader::PosixUStar(PosixUStarHeader { prefix: prefix }))));
290
291fn parse_ustar00_extra<'a, 'b>(i: CompleteByteSlice<'a>, flag: &'b TypeFlag) -> IResult<CompleteByteSlice<'a>, UStarExtraHeader<'a>> {
292    match *flag {
293        TypeFlag::PaxInterexchangeFormat => do_parse!(i, header: parse_ustar00_extra_pax >> (UStarExtraHeader::Pax(header))),
294        _                                => parse_ustar00_extra_posix(i)
295    }
296}
297
298fn parse_ustar00<'a, 'b>(i: CompleteByteSlice<'a>, flag: &'b TypeFlag) -> IResult<CompleteByteSlice<'a>, ExtraHeader<'a>> {
299    do_parse!(i,
300        tag!("00")                                  >>
301        uname:    parse_str32                       >>
302        gname:    parse_str32                       >>
303        devmajor: parse_octal8                      >>
304        devminor: parse_octal8                      >>
305        extra:    apply!(parse_ustar00_extra, flag) >>
306        (ExtraHeader::UStar(UStarHeader {
307            magic:    "ustar\0",
308            version:  "00",
309            uname:    uname,
310            gname:    gname,
311            devmajor: devmajor,
312            devminor: devminor,
313            extra:    extra
314        }))
315    )
316}
317
318fn parse_ustar<'a, 'b>(i: CompleteByteSlice<'a>, flag: &'b TypeFlag) -> IResult<CompleteByteSlice<'a>, ExtraHeader<'a>> {
319    do_parse!(i, tag!("ustar\0") >> ustar: apply!(parse_ustar00, flag) >> (ustar))
320}
321
322/*
323 * Posix tar archive header parsing
324 */
325
326named!(parse_posix<CompleteByteSlice<'_>, ExtraHeader<'_>>, do_parse!(take!(255) >> (ExtraHeader::Padding))); /* padding to 512 */
327
328fn parse_maybe_longname<'a, 'b>(i: CompleteByteSlice<'a>, flag: &'b TypeFlag) -> IResult<CompleteByteSlice<'a>, &'a str> {
329    match *flag {
330         TypeFlag::GNULongName => parse_str512(i),
331         _                     => Err(nom::Err::Error(error_position!(i, ErrorKind::Complete)))
332    }
333}
334
335fn parse_header(i: CompleteByteSlice<'_>) -> IResult<CompleteByteSlice<'_>, PosixHeader<'_>> {
336    do_parse!(i,
337        name:     parse_str100                                       >>
338        mode:     parse_octal8                                       >>
339        uid:      parse_octal8                                       >>
340        gid:      parse_octal8                                       >>
341        size:     parse_octal12                                      >>
342        mtime:    parse_octal12                                      >>
343        chksum:   parse_str8                                         >>
344        typeflag: parse_type_flag                                    >>
345        linkname: parse_str100                                       >>
346        ustar:    alt!(apply!(parse_ustar, &typeflag) | parse_posix) >>
347        longname: opt!(apply!(parse_maybe_longname, &typeflag))      >>
348        (PosixHeader {
349            name:     longname.unwrap_or(name),
350            mode:     mode,
351            uid:      uid,
352            gid:      gid,
353            size:     size,
354            mtime:    mtime,
355            chksum:   chksum,
356            typeflag: typeflag,
357            linkname: linkname,
358            ustar:    ustar
359        })
360    )
361}
362
363/*
364 * Contents parsing
365 */
366
367fn parse_contents(i: CompleteByteSlice<'_>, size: u64) -> IResult<CompleteByteSlice<'_>, CompleteByteSlice<'_>> {
368    let trailing = size % 512;
369    let padding  = match trailing {
370        0 => 0,
371        t => 512 - t
372    };
373    do_parse!(i, contents: take!(size as usize) >> take!(padding as usize) >> (contents))
374}
375
376/*
377 * Tar entry header + contents parsing
378 */
379
380named!(parse_entry<CompleteByteSlice<'_>, TarEntry<'_>>, do_parse!(
381    header:   parse_header                        >>
382    contents: apply!(parse_contents, header.size) >>
383    (TarEntry {
384        header: header,
385        contents: &contents
386    })
387));
388
389/*
390 * Tar archive parsing
391 */
392
393fn filter_entries(entries: Vec<TarEntry<'_>>) -> Vec<TarEntry<'_>> {
394    /* Filter out empty entries */
395    entries.into_iter().filter(|e| e.header.name != "").collect::<Vec<TarEntry<'_>>>()
396}
397
398pub fn parse_tar(i: &[u8]) -> IResult<CompleteByteSlice<'_>, Vec<TarEntry<'_>>> {
399    do_parse!(CompleteByteSlice(i), entries: map!(many0!(parse_entry), filter_entries) >> eof!() >> (entries))
400}
401
402/*
403 * Tests
404 */
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409    use std::str::from_utf8;
410    use nom::ErrorKind;
411
412    const EMPTY: CompleteByteSlice<'_> = CompleteByteSlice(b"");
413
414    #[test]
415    fn parse_octal_ok_test() {
416        assert_eq!(parse_octal(CompleteByteSlice(b"756"), 3),       Ok((EMPTY, 494)));
417        assert_eq!(parse_octal(CompleteByteSlice(b"756\01234"), 8), Ok((EMPTY, 494)));
418        assert_eq!(parse_octal(CompleteByteSlice(b""), 0),          Ok((EMPTY, 0)));
419    }
420
421    #[test]
422    fn parse_octal_error_test() {
423        let t1: CompleteByteSlice<'_> = CompleteByteSlice(b"1238");
424        let _e: CompleteByteSlice<'_> = CompleteByteSlice(b"8");
425        let t2: CompleteByteSlice<'_> = CompleteByteSlice(b"a");
426        let t3: CompleteByteSlice<'_> = CompleteByteSlice(b"A");
427
428        assert_eq!(parse_octal(t1, 4), Err(nom::Err::Error(error_position!(_e, ErrorKind::OctDigit))));
429        assert_eq!(parse_octal(t2, 1), Err(nom::Err::Error(error_position!(t2, ErrorKind::OctDigit))));
430        assert_eq!(parse_octal(t3, 1), Err(nom::Err::Error(error_position!(t3, ErrorKind::OctDigit))));
431    }
432
433    #[test]
434    fn take_str_eat_garbage_test() {
435        let s   = CompleteByteSlice(b"foobar\0\0\0\0baz");
436        let baz = CompleteByteSlice(b"baz");
437        assert_eq!(take_str_eat_garbage!(s.slice(..), 10), Ok((baz.slice(..), "foobar")));
438    }
439}