nom_gzip/
lib.rs

1pub mod types;
2use types::*;
3
4#[macro_use]
5extern crate nom;
6
7use nom::{le_u16, le_u32};
8use nom::Endianness::Little;
9
10use std::time::Duration;
11
12named!(null_terminated_string<String>, map_res!(terminated!(take_until!(&[0x00][..]), take!(1)), |buf: &[u8]| String::from_utf8(buf.to_vec())));
13named!(get_byte<u8>, map!(take!(1), |bs| bs[0]));
14
15named!(id1, tag!([0x1f]));
16named!(id2, tag!([0x8b]));
17named!(compression_method<CompressionMethod>, map!(take!(1), |b| CompressionMethod::from(b[0])));
18named!(flags<Flags>, map!(take!(1), |b| Flags::from(b[0])));
19named!(modified_time_as_secs_since_epoch<Duration>, map!(u32!(Little), |t| Duration::from_secs(t as u64)));
20named!(extra_flags<ExtraFlags>, map!(take!(1), |b| ExtraFlags::from(b[0])));
21named!(operating_system<OperatingSystem>, map!(take!(1), |b| OperatingSystem::from(b[0])));
22
23/// What little documentation I could find on existing sub-fields lives at
24/// http://www.gzip.org/format.txt but it's woefully inadequate as a spec.
25named!(sub_field<SubField>, do_parse!(
26       id1: get_byte
27    >> id2: get_byte
28    >> data: length_data!(le_u16)
29    >>
30    (SubField { id1, id2, data })
31));
32
33named!(extra_field<ExtraField>, length_value!(le_u16, map!(many0!(sub_field), |sub_fields| ExtraField{ sub_fields })));
34named!(original_filename<String>, call!(null_terminated_string));
35named!(file_comment<String>, call!(null_terminated_string));
36named!(header_crc16<u16>, call!(le_u16));
37named!(footer_crc32<u32>, call!(le_u32));
38named!(input_size<u32>, call!(le_u32));
39
40named!(pub gzip_header<GzipHeader>, do_parse!(
41       id1
42    >> id2
43    >> compression_method: compression_method
44    >> flags: flags
45    >> modified_time_as_secs_since_epoch: modified_time_as_secs_since_epoch
46    >> extra_flags: extra_flags
47    >> operating_system: operating_system
48    >> extra_field: cond!(flags.fextra, call!(extra_field))
49    >> original_filename: cond!(flags.fname, call!(original_filename))
50    >> file_comment: cond!(flags.fcomment, call!(file_comment))
51    >> header_crc: cond!(flags.fhcrc, call!(header_crc16))
52    >>
53
54    (GzipHeader {
55        compression_method,
56        flags,
57        modified_time_as_secs_since_epoch,
58        extra_flags,
59        operating_system,
60        extra_field,
61        original_filename,
62        file_comment,
63        header_crc
64    })
65));
66
67named!(pub gzip_footer<GzipFooter>, do_parse!(
68       crc: footer_crc32
69    >> input_size: input_size
70    >> eof!()
71    >>
72
73    (GzipFooter { crc, input_size })
74));
75
76/// This will probably be pretty slow; you'll likely want to use `gzip_header` and then make use of
77/// the GZIP stream directly from there, passing in the last 8 bytes to `gzip_footer` if necessary.
78named!(pub gzip_file<GzipFile>, do_parse! (
79    header: gzip_header
80    >> gzip_file: map!(many_till!(call!(get_byte), call!(gzip_footer)), |tup: (Vec<u8>, GzipFooter)| {
81                    GzipFile { header, footer: tup.1, compressed_blocks: tup.0.iter().map(|b| *b).collect() }
82                  })
83    >>
84
85    (gzip_file)
86));
87
88#[cfg(test)]
89mod tests {
90
91    extern crate nom;
92    extern crate byteorder;
93
94    use tests::nom::IResult::Done;
95
96    use super::*;
97
98    macro_rules! empty {
99        () => {
100            &b""[..];
101        }
102    }
103
104    macro_rules! test_null_terminated {
105        ($func:ident) => {
106            let input = &b"This is null-terminated\0"[..];
107            let expected = String::from("This is null-terminated");
108            match $func(input) {
109                Done(_, actual) => assert_eq!(actual, expected),
110                unexpected => assert!(false, "Unable to parse null-terminated string, got back {:?}", unexpected),
111            }
112        }
113    }
114
115    macro_rules! test_u16 {
116        ($func:ident) => {
117            use tests::byteorder::{ByteOrder, LittleEndian};
118            let mut buf: [u8; 2] = [0; 2];
119            for expected in 0x0000u16 .. 0xffffu16 {
120                LittleEndian::write_u16(&mut buf[0..2], expected);
121                assert_eq!($func(&buf[..]), Done(empty!(), expected));
122            }
123        }
124    }
125
126    macro_rules! test_u32 {
127        ($func:ident) => {
128            use tests::byteorder::{ByteOrder, LittleEndian};
129            let samples: [u32; 6] = [0x00000000, 0xffffffff, 0xff00ff00, 0x00ff00ff, 0x01234567, 0x89abcdef];
130            let mut buf: [u8; 4] = [0; 4];
131            for expected in samples.iter() {
132                LittleEndian::write_u32(&mut buf[0..4], *expected);
133                assert_eq!($func(&buf[..]), Done(empty!(), *expected));
134            }
135        }
136    }
137
138    #[test]
139    fn test_id1() {
140        let input: &[u8] = &[0x1f][..];
141        assert_eq!(id1(input), Done(&b""[..], input));
142    }
143
144    #[test]
145    fn test_id2() {
146        let input: &[u8] = &[0x8b][..];
147        assert_eq!(id2(input), Done(&b""[..], input));
148    }
149
150    #[test]
151    fn test_compression_method() {
152        use CompressionMethod::*;
153        assert_eq!(compression_method(&[0x00][..]), Done(empty!(), Reserved0));
154        assert_eq!(compression_method(&[0x01][..]), Done(empty!(), Reserved1));
155        assert_eq!(compression_method(&[0x02][..]), Done(empty!(), Reserved2));
156        assert_eq!(compression_method(&[0x03][..]), Done(empty!(), Reserved3));
157        assert_eq!(compression_method(&[0x04][..]), Done(empty!(), Reserved4));
158        assert_eq!(compression_method(&[0x05][..]), Done(empty!(), Reserved5));
159        assert_eq!(compression_method(&[0x06][..]), Done(empty!(), Reserved6));
160        assert_eq!(compression_method(&[0x07][..]), Done(empty!(), Reserved7));
161        assert_eq!(compression_method(&[0x08][..]), Done(empty!(), Deflate));
162        for b in 0x09u8 .. 0xffu8 {
163            assert_eq!(compression_method(&[b][..]), Done(empty!(), Unknown));
164        }
165    }
166
167    #[test]
168    fn test_flags() {
169        for byte in 0b0000_0000 .. 0b0001_1111 {
170            let expected = Done(empty!(), Flags {
171                ftext:    byte & 0b0000_0001 > 0,
172                fhcrc:    byte & 0b0000_0010 > 0,
173                fextra:   byte & 0b0000_0100 > 0,
174                fname:    byte & 0b0000_1000 > 0,
175                fcomment: byte & 0b0001_0000 > 0,
176            });
177            assert_eq!(flags(&[byte][..]), expected);
178        }
179    }
180
181    #[test]
182    fn test_modified_time_as_secs_since_epoch() {
183        use tests::byteorder::{ByteOrder, LittleEndian};
184        use std::time::{SystemTime, UNIX_EPOCH};
185        let now = SystemTime::now();
186        let expected = Duration::from_secs(now.duration_since(UNIX_EPOCH).unwrap().as_secs()); // kill the nanos
187        let mut buffer: [u8; 4] = [0; 4];
188        LittleEndian::write_u32(&mut buffer[..], expected.as_secs() as u32);
189        match modified_time_as_secs_since_epoch(&buffer[..]) {
190            Done(remaining, actual) => {
191                assert_eq!(remaining, empty!());
192                assert_eq!(actual, expected);
193            }
194            _ => panic!("")
195        }
196    }
197
198    #[test]
199    fn test_extra_flags() {
200        assert_eq!(extra_flags(&[0x02u8][..]), Done(empty!(), ExtraFlags::MaximumCompression));
201        assert_eq!(extra_flags(&[0x04u8][..]), Done(empty!(), ExtraFlags::FastestAlgorithm));
202        for byte in 0x00u8 .. 0xffu8 {
203            let masked = byte & 0b1111_1001;
204            assert_eq!(extra_flags(&[masked][..]), Done(empty!(), ExtraFlags::Unknown));
205        }
206    }
207
208    #[test]
209    fn test_operating_system() {
210        use OperatingSystem::*;
211        assert_eq!(operating_system(&[0u8][..]),  Done(empty!(), Fat));
212        assert_eq!(operating_system(&[1u8][..]),  Done(empty!(), Amiga));
213        assert_eq!(operating_system(&[2u8][..]),  Done(empty!(), Vms));
214        assert_eq!(operating_system(&[3u8][..]),  Done(empty!(), Unix));
215        assert_eq!(operating_system(&[4u8][..]),  Done(empty!(), VmCms));
216        assert_eq!(operating_system(&[5u8][..]),  Done(empty!(), AtariTos));
217        assert_eq!(operating_system(&[6u8][..]),  Done(empty!(), Hpfs));
218        assert_eq!(operating_system(&[7u8][..]),  Done(empty!(), Macintosh));
219        assert_eq!(operating_system(&[8u8][..]),  Done(empty!(), Zsystem));
220        assert_eq!(operating_system(&[9u8][..]),  Done(empty!(), Cpm));
221        assert_eq!(operating_system(&[10u8][..]), Done(empty!(), Tops20));
222        assert_eq!(operating_system(&[11u8][..]), Done(empty!(), Ntfs));
223        assert_eq!(operating_system(&[12u8][..]), Done(empty!(), Qdos));
224        assert_eq!(operating_system(&[13u8][..]), Done(empty!(), AcornRiscos));
225        for b in 14u8 .. 0xffu8 {
226            assert_eq!(operating_system(&[b][..]), Done(empty!(), Unknown));
227        }
228    }
229
230    #[test]
231    fn test_sub_field() {
232        use tests::byteorder::{ByteOrder, LittleEndian};
233        let mut field: [u8; 8] = [0; 8];
234        for (pos, val) in "cp  cpio".bytes().enumerate() {
235            field[pos] = val;
236        }
237        LittleEndian::write_u16(&mut field[2..4], 4);
238
239        assert_eq!(sub_field(&field[..]), Done(empty!(), SubField {
240            id1: 'c' as u8,
241            id2: 'p' as u8,
242            data: &b"cpio"[..],
243        }));
244    }
245
246    #[test]
247    fn test_extra_field() {
248        use tests::byteorder::{ByteOrder, LittleEndian};
249        let mut xfield: [u8; 42] = [0; 42];
250        for (pos, val) in "  cp  cpio.Ac  acorn.KN  keynote assertion".bytes().enumerate() {
251            xfield[pos] = val;
252        }
253        LittleEndian::write_u16(&mut xfield[0..2],  40);
254        LittleEndian::write_u16(&mut xfield[4..6],   5);
255        LittleEndian::write_u16(&mut xfield[13..15], 6);
256        LittleEndian::write_u16(&mut xfield[23..25], 17);
257
258        match extra_field(&xfield[..]) {
259            Done(_, actual) => {
260                assert!(actual.sub_fields.contains(&SubField {
261                    id1: 'c' as u8,
262                    id2: 'p' as u8,
263                    data: &b"cpio."[..],
264                }));
265                assert!(actual.sub_fields.contains(&SubField {
266                    id1: 'A' as u8,
267                    id2: 'c' as u8,
268                    data: &b"acorn."[..],
269                }));
270                assert!(actual.sub_fields.contains(&SubField {
271                    id1: 'K' as u8,
272                    id2: 'N' as u8,
273                    data: &b"keynote assertion"[..],
274                }));
275
276            },
277            unexpected => assert!(false, "Unable to parse extra field, got back {:?}", unexpected),
278        }
279    }
280
281    #[test]
282    fn test_get_byte() {
283        for expected in 0x00u8 .. 0xffu8 {
284            assert_eq!(get_byte(&[expected][..]), Done(empty!(), expected));
285        }
286    }
287
288    #[test]
289    fn test_null_terminated_string() {
290        test_null_terminated!(null_terminated_string);
291    }
292
293    #[test]
294    fn test_original_filename() {
295        test_null_terminated!(original_filename);
296    }
297
298    #[test]
299    fn test_file_comment() {
300        test_null_terminated!(file_comment);
301    }
302
303    #[test]
304    fn test_header_crc16() {
305        test_u16!(header_crc16);
306    }
307
308    #[test]
309    fn test_footer_crc32() {
310        test_u32!(footer_crc32);
311    }
312
313    #[test]
314    fn test_input_size() {
315        test_u32!(input_size);
316    }
317
318}