root_io/core/
parsers.rs

1use std::convert::TryFrom;
2use std::fmt::Debug;
3use std::io::Read;
4/// Parsers of the ROOT core types. Note that objects in ROOT files
5/// are often, but not always, preceeded by their size. The parsers in
6/// this module do therefore not included this leading size
7/// information. Usually, the user will want to do that with something
8/// along the lines of `length_value!(checked_byte_count, tobject)`
9/// themselves.
10use std::str;
11
12use failure::Error;
13use flate2::bufread::ZlibDecoder;
14use lzma_rs::xz_decompress;
15use nom::{
16    self,
17    bytes::complete::{take, take_until},
18    combinator::{map, map_res, rest, verify},
19    error::ParseError,
20    multi::{count, length_data, length_value},
21    number::complete::{be_i32, be_u16, be_u32, be_u64, be_u8},
22    sequence::{pair, tuple},
23    IResult,
24};
25
26use crate::core::*;
27
28#[allow(clippy::trivially_copy_pass_by_ref)]
29fn is_byte_count(v: &u32) -> bool {
30    Flags::from_bits_truncate(*v).intersects(Flags::BYTE_COUNT_MASK)
31}
32
33/// Return the size in bytes of the following object in the input. The
34/// count is the remainder of this object minus the size of the count.
35pub fn checked_byte_count<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], u32, E>
36where
37    E: ParseError<&'s [u8]> + Debug,
38{
39    verify(
40        map(verify(be_u32, is_byte_count), |v| {
41            v & !Flags::BYTE_COUNT_MASK.bits()
42        }),
43        |v| *v != 0,
44    )(input)
45}
46
47/// Read ROOT's version of short and long strings (preceeded by u8). Does not read null terminated!
48#[rustfmt::skip::macros(do_parse)]
49pub fn string<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], String, E>
50where
51    E: ParseError<&'s [u8]> + Debug,
52{
53    do_parse!(input,
54              len: switch!(be_u8,
55                           255 => call!(be_u32) |
56                           a => value!(u32::from(a))) >>
57              s: map!(
58                  map_res!(take!(len), |s| str::from_utf8(s)),
59                  |s| s.to_string()) >>
60              (s)
61    )
62}
63
64/// Parser for the most basic of ROOT types
65pub fn tobject<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], TObject, E>
66where
67    E: ParseError<&'s [u8]> + Debug,
68{
69    do_parse!(
70        input,
71        ver: be_u16 >> // version_consume_extra_virtual >>
72              id: be_u32 >>
73              bits: map!(be_u32, |v| {
74                  // TObjects read from disc must have the ON_HEAP flag
75                  TObjectFlags::from_bits_truncate(v| TObjectFlags::IS_ON_HEAP.bits())}
76              ) >>
77              _ref: cond!(bits.intersects(TObjectFlags::IS_REFERENCED), be_u16) >>
78              ({TObject {
79                  ver, id, bits
80              }})
81    )
82}
83
84/// Parse a `TList`
85pub fn tlist<'s, E>(i: &'s [u8], ctx: &'s Context) -> IResult<&'s [u8], Vec<Raw<'s>>, E>
86where
87    E: ParseError<&'s [u8]> + Debug,
88{
89    let (i, _ver) = verify(be_u16, |&v| v == 5)(i)?;
90    let (i, (_tobj, _name, len)) = tuple((tobject, string, be_i32))(i)?;
91    let (i, objs) = count(
92        |i| {
93            let wrapped_raw = |i| raw(i, ctx);
94            let (i, obj) = length_value(checked_byte_count, wrapped_raw)(i)?;
95            let (i, _) = length_data(be_u8)(i)?;
96            Ok((i, obj))
97        },
98        len as usize,
99    )(i)?;
100    let (i, _) = rest(i)?;
101    Ok((i, objs))
102}
103
104/// Parser for `TNamed` objects
105#[rustfmt::skip::macros(do_parse)]
106pub fn tnamed<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], TNamed, E>
107where
108    E: ParseError<&'s [u8]> + Debug,
109{
110    do_parse!(input,
111              _ver: be_u16 >>
112              _tobj: tobject >>
113              name: string >>
114              title: string >>
115              ({TNamed{name, title}})
116    )
117}
118
119/// Parse a `TObjArray`
120#[rustfmt::skip::macros(do_parse)]
121pub fn tobjarray<'s, E, F, O>(
122    parser: F,
123    i: &'s [u8],
124    context: &'s Context,
125) -> nom::IResult<&'s [u8], Vec<O>, E>
126where
127    F: Fn(&Raw<'s>, &'s Context) -> nom::IResult<&'s [u8], O, E>,
128    E: ParseError<&'s [u8]> + Debug,
129{
130    let (i, _ver) = be_u16(i)?;
131    let (i, _tobj) = tobject(i)?;
132    let (i, _name) = c_string(i)?;
133    let (i, size) = be_i32(i)?;
134    let (i, _low) = be_i32(i)?;
135    let (i, objs) = count(
136        map_res(
137            |i| raw(i, &context),
138            |r| {
139                let res = parser(&r, &context).map(|(_i, res)| res);
140                if res.is_err() {
141                    res.as_ref().unwrap();
142                }
143                res
144            },
145        ),
146        size as usize,
147    )(i)?;
148    Ok((i, objs))
149}
150
151/// Parse a `TObjArray` which does not have references pointing outside of the input buffer
152#[rustfmt::skip::macros(do_parse)]
153pub fn tobjarray_no_context<'s, E>(
154    input: &'s [u8],
155) -> nom::IResult<&'s [u8], Vec<(ClassInfo, &'s [u8])>, E>
156where
157    E: ParseError<&'s [u8]> + Debug,
158{
159    do_parse!(input,
160              _ver: be_u16 >>
161              _tobj: tobject >>
162              _name: c_string >>
163              _size: be_i32 >>
164              _low: be_i32 >>
165              objs: map!(count!(raw_no_context, _size as usize),
166                         |v| v.into_iter().map(|(ci, s)| (ci, s)).collect()) >>
167              (objs)
168    )
169}
170
171#[rustfmt::skip::macros(do_parse)]
172named!(
173    #[doc="Parser for `TObjString`"],
174    pub tobjstring<&[u8], String>,
175    do_parse!(_ver: be_u16 >>
176              _tobj: tobject >>
177              name: string >>
178              _eof: eof!() >>
179              ({name})
180    )
181);
182
183/// Parse a so-called `TArray`. Note that ROOT's `TArray`s are actually not fixed size.
184/// Example usage for TArrayI: `tarray(nom::complete::be_i32, input_slice)`
185pub fn tarray<'s, E, F, O>(parser: F, i: &'s [u8]) -> nom::IResult<&'s [u8], Vec<O>, E>
186where
187    F: Fn(&'s [u8]) -> nom::IResult<&'s [u8], O, E>,
188    E: ParseError<&'s [u8]> + Debug,
189{
190    let (i, counts) = be_i32(i)?;
191    count(parser, counts as usize)(i)
192}
193
194fn decode_reader(bytes: &[u8], magic: &str) -> Result<Vec<u8>, Error> {
195    let mut ret = vec![];
196    match magic {
197        "ZL" => {
198            let mut decoder = ZlibDecoder::new(&bytes[..]);
199            decoder.read_to_end(&mut ret)?;
200        }
201        "XZ" => {
202            let mut reader = std::io::BufReader::new(bytes);
203            xz_decompress(&mut reader, &mut ret).unwrap();
204        }
205        "L4" => {
206            use lz4_compress::decompress;
207            let (bytes, _checksum) = be_u64::<()>(bytes).unwrap();
208            ret = decompress(bytes).unwrap();
209        }
210        m => return std::dbg!(Err(format_err!("Unsupported compression format `{}`", m))),
211    };
212    Ok(ret)
213}
214
215/// Decompress the given buffer. Figures out the compression algorithm from the preceeding \"magic\" bytes
216pub fn decompress(input: &[u8]) -> nom::IResult<&[u8], Vec<u8>> {
217    map_res(
218        tuple((|i| take_str!(i, 2usize), take(7usize), rest)),
219        |(magic, _header, comp_buf)| decode_reader(comp_buf, magic),
220    )(input)
221}
222
223/// Parse a null terminated string
224pub fn c_string<'s, E>(i: &'s [u8]) -> nom::IResult<&[u8], &str, E>
225where
226    E: ParseError<&'s [u8]> + Debug,
227{
228    let (i, s) = map_res(take_until(b"\x00".as_ref()), str::from_utf8)(i)?;
229    // consume the null tag
230    let (i, _) = take(1usize)(i)?;
231    Ok((i, s))
232}
233
234/// Figure out the class we are looking at. The data might not be
235/// saved locally but rather in a reference to some other place in the
236/// buffer.This is modeled after ROOT's `TBufferFile::ReadObjectAny` and
237/// `TBufferFile::ReadClass`
238pub fn classinfo<'s, E>(i: &'s [u8]) -> nom::IResult<&[u8], ClassInfo, E>
239where
240    E: ParseError<&'s [u8]> + Debug,
241{
242    let (i, tag) = {
243        let (i, bcnt) = be_u32(i)?;
244        if !is_byte_count(&bcnt) || bcnt == Flags::NEW_CLASSTAG.bits() {
245            (i, bcnt)
246        } else {
247            be_u32(i)?
248        }
249    };
250    let (i, cl) = match tag as u32 {
251        0xFFFF_FFFF => {
252            let (i, cl) = map!(i, c_string, ClassInfo::New)?;
253            (i, cl)
254        }
255        tag => {
256            if Flags::from_bits_truncate(tag).contains(Flags::CLASS_MASK) {
257                (i, ClassInfo::Exists(tag & !Flags::CLASS_MASK.bits()))
258            } else {
259                (i, ClassInfo::References(tag))
260            }
261        }
262    };
263    Ok((i, cl))
264}
265
266/// Figure out the class we are looking at. This parser immediately
267/// resolves possible references returning the name of the object in
268/// this buffer and the associated data. This function needs a
269/// `Context`, though, which may not be available. If so, have a look
270/// at the `classinfo` parser.
271pub fn class_name_and_buffer<'s, E>(
272    i: &'s [u8],
273    context: &'s Context,
274) -> nom::IResult<&'s [u8], (&'s str, &'s [u8]), E>
275where
276    E: ParseError<&'s [u8]> + std::fmt::Debug,
277{
278    let ctx_offset = u32::try_from(context.offset)
279        .expect("Encountered pointer larger than 32 bits. Please file a bug.");
280    let (i, ci) = classinfo(i)?;
281    Ok(match ci {
282        ClassInfo::New(s) => {
283            let (i, buf) = length_value(checked_byte_count, rest)(i)?;
284            (i, (s, buf))
285        }
286        ClassInfo::Exists(tag) => {
287            let name = {
288                let abs_offset = tag & !Flags::CLASS_MASK.bits();
289                let s = &context.s[((abs_offset - ctx_offset) as usize)..];
290                let (_, (name, _)) = class_name_and_buffer(s, context)?;
291                name
292            };
293            let (i, buf) = length_value(checked_byte_count, rest)(i)?;
294            (i, (name, buf))
295        }
296        ClassInfo::References(tag) => {
297            let (name, buf) = {
298                let abs_offset = tag;
299                // Sometimes, the reference points to `0`; so we return an empty slice
300                if abs_offset == 0 {
301                    ("", &context.s[..0])
302                } else {
303                    let s = &context.s[((abs_offset - ctx_offset) as usize)..];
304                    let (_, (name, buf)) = class_name_and_buffer(s, context)?;
305                    (name, buf)
306                }
307            };
308            (i, (name, buf))
309        }
310    })
311}
312
313/// Parse a `Raw` chunk from the given input buffer. This is usefull when one does not know the exact type at the time of parsing
314#[rustfmt::skip::macros(do_parse)]
315pub fn raw<'s, E>(input: &'s [u8], context: &'s Context) -> nom::IResult<&'s [u8], Raw<'s>, E>
316where
317    E: ParseError<&'s [u8]> + Debug,
318{
319    do_parse!(input,
320              string_and_obj: call!(class_name_and_buffer, context) >>
321              // obj: length_value!(checked_byte_count, call!(nom::rest)) >>
322              ({let (classinfo, obj) = string_and_obj;
323                Raw{classinfo, obj}})
324    )
325}
326
327/// Same as `raw` but doesn't require a `Context` as input. Panics if
328/// a `Context` is required to parse the underlying buffer (i.e., the
329/// given buffer contains a reference to some other part of the file.
330pub fn raw_no_context<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], (ClassInfo, &[u8]), E>
331where
332    E: ParseError<&'s [u8]> + Debug,
333{
334    use super::ClassInfo::*;
335    let (input, ci) = classinfo(input)?;
336    let obj = match ci {
337        // point to beginning of slice
338        References(0) => value!(input, &input[..0]),
339        New(_) | Exists(_) => length_value!(input, checked_byte_count, call!(rest)),
340        // If its a reference to any other thing but 0 it needs a context
341        _ => panic!("Object needs context!"),
342    };
343    obj.map(|(i, o)| (i, (ci, o)))
344}
345
346/// ESD trigger classes are strings describing a particular
347/// Trigger. Each event (but in reality every run) might have a
348/// different "menu" of available triggers. The trigger menu is saved
349/// as an `TObjArray` of `TNamed` objects for each event. This breaks
350/// it down to a simple vector
351pub fn parse_tobjarray_of_tnameds<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], Vec<String>, E>
352where
353    E: ParseError<&'s [u8]> + Debug,
354{
355    // each element of the tobjarray has a Vec<u8>
356    let (input, vals) = length_value(checked_byte_count, tobjarray_no_context)(input)?;
357    let strings = vals
358        .into_iter()
359        .map(|(ci, el)| {
360            if let ClassInfo::References(0) = ci {
361                Ok("".to_string())
362            } else {
363                tnamed(&el).map(|(_input, tn)| tn.name)
364            }
365        })
366        .collect::<Result<Vec<String>, _>>();
367    strings.map(|ss| (input, ss))
368}
369
370/// Some Double_* values are saved with a custom mantissa... The
371/// number of bytes can be found in the comment string of the
372/// generated YAML code (for ALICE ESD files at least).  This function
373/// reconstructs a float from the exponent and mantissa
374pub fn parse_custom_mantissa<'s, E>(input: &'s [u8], nbits: usize) -> nom::IResult<&[u8], f32, E>
375where
376    E: ParseError<&'s [u8]> + Debug,
377{
378    // TODO: Use ByteOrder crate to be cross-platform?
379    pair(be_u8, be_u16)(input).map(|(input, (exp, man))| {
380        let mut s = u32::from(exp);
381        // Move the exponent into the last 23 bits
382        s <<= 23;
383        s |= (u32::from(man) & ((1 << (nbits + 1)) - 1)) << (23 - nbits);
384        (input, f32::from_bits(s))
385    })
386}
387
388#[cfg(test)]
389mod classinfo_test {
390    use super::classinfo;
391    use nom::error::VerboseError;
392
393    /// There is an issue where the following is parsed differently on
394    /// nightly ( rustc 1.25.0-nightly (79a521bb9 2018-01-15)), than
395    /// on stable, if verbose-errors are enabled for nom in the
396    /// cargo.toml
397    #[test]
398    fn classinfo_not_complete_read() {
399        let i = vec![
400            128, 0, 0, 150, 64, 0, 1, 92, 0, 3, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0,
401            0, 0, 64, 0, 0, 103, 128, 0, 0, 193, 64, 0, 0, 95, 0, 3, 64, 0, 0, 85, 0, 4, 64, 0, 0,
402            38, 0, 1, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 7, 84, 79, 98, 106, 101, 99, 116, 17, 66, 97,
403            115, 105, 99, 32, 82, 79, 79, 84, 32, 111, 98, 106, 101, 99, 116, 0, 0, 0, 66, 0, 0, 0,
404            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144, 27, 192, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405            0, 0, 4, 66, 65, 83, 69, 0, 0, 0, 1, 64, 0, 0, 116, 255, 255, 255, 255, 84, 83, 116,
406            114, 101, 97, 109, 101, 114, 83, 116, 114, 105, 110, 103, 0, 64, 0, 0, 92, 0, 2, 64, 0,
407            0, 86, 0, 4, 64, 0, 0, 36, 0, 1, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 5, 102, 78, 97, 109,
408            101, 17, 111, 98, 106, 101, 99, 116, 32, 105, 100, 101, 110, 116, 105, 102, 105, 101,
409            114, 0, 0, 0, 65, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
410            0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 84, 83, 116, 114, 105, 110, 103, 64, 0, 0, 96, 128, 0,
411            18, 227, 64, 0, 0, 88, 0, 2, 64, 0, 0, 82, 0, 4, 64, 0, 0, 32, 0, 1, 0, 1, 0, 0, 0, 0,
412            3, 0, 0, 0, 6, 102, 84, 105, 116, 108, 101, 12, 111, 98, 106, 101, 99, 116, 32, 116,
413            105, 116, 108, 101, 0, 0, 0, 65, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
414            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 84, 83, 116, 114, 105, 110, 103,
415        ];
416        let i = i.as_slice();
417        let (i, _ci) = classinfo::<VerboseError<_>>(i).unwrap();
418        // The error manifests in the entire input being (wrongly)
419        // consumed, instead of having some left overs
420        assert_eq!(i.len(), 352);
421    }
422}