root_io/core/
file.rs

1use std::fmt;
2
3use failure::Error;
4use nom::{
5    self,
6    bytes::complete::tag,
7    error::VerboseError,
8    number::complete::{be_i16, be_i32, be_u128, be_u16, be_u32, be_u64, be_u8},
9    IResult,
10};
11
12use uuid::Uuid;
13
14use crate::{
15    code_gen::rust::{ToNamedRustParser, ToRustStruct},
16    core::tstreamer::streamers,
17    core::*,
18    MAP_OFFSET,
19};
20
21/// Size of serialized `FileHeader` in bytes
22const FILE_HEADER_SIZE: u64 = 75;
23
24/// Size of serialized TDirectory. Depending on the ROOT version this
25/// may use 32 or 64 bit pointers. This is the maximal (64 bit size).
26const TDIRECTORY_MAX_SIZE: u64 = 42;
27
28/// `RootFile` wraps the most basic information of a ROOT file.
29#[derive(Debug)]
30pub struct RootFile {
31    source: Source,
32    hdr: FileHeader,
33    items: Vec<FileItem>,
34}
35
36#[derive(Debug, PartialEq)]
37struct FileHeader {
38    version: i32,
39    begin: i32,
40    end: u64,
41    seek_free: u64,
42    nbytes_free: i32,
43    n_entries_free: i32,
44    n_bytes_name: i32,
45    pointer_size: u8,
46    compression: i32,
47    seek_info: SeekPointer,
48    nbytes_info: i32,
49    uuid: Uuid,
50    seek_dir: SeekPointer,
51}
52
53#[derive(Debug, PartialEq)]
54pub struct Directory {
55    version: i16,
56    c_time: u32,
57    m_time: u32,
58    n_bytes_keys: i32,
59    n_bytes_name: i32,
60    seek_dir: SeekPointer,
61    seek_parent: SeekPointer,
62    seek_keys: SeekPointer,
63}
64
65/// Parse opening part of a root file
66fn file_header(i: &[u8]) -> IResult<&[u8], FileHeader> {
67    fn version_dep_int(i: &[u8], is_64_bit: bool) -> IResult<&[u8], u64> {
68        if is_64_bit {
69            be_u64(i)
70        } else {
71            let (i, end) = be_u32(i)?;
72            Ok((i, end as u64))
73        }
74    }
75    let (i, _) = tag("root")(i)?;
76    let (i, version) = be_i32(i)?;
77    let is_64_bit = version > 1000000;
78    let (i, begin) = be_i32(i)?;
79    let (i, end) = version_dep_int(i, is_64_bit)?;
80    let (i, seek_free) = version_dep_int(i, is_64_bit)?;
81    let (i, nbytes_free) = be_i32(i)?;
82    let (i, n_entries_free) = be_i32(i)?;
83    let (i, n_bytes_name) = be_i32(i)?;
84    let (i, pointer_size) = be_u8(i)?;
85    let (i, compression) = be_i32(i)?;
86    let (i, seek_info) = version_dep_int(i, is_64_bit)?;
87    let (i, nbytes_info) = be_i32(i)?;
88    let (i, _uuid_version) = be_u16(i)?;
89    let (i, uuid) = be_u128(i)?;
90
91    let uuid = Uuid::from_u128(uuid);
92    let seek_dir = (begin + n_bytes_name) as u64;
93    Ok((
94        i,
95        FileHeader {
96            version,
97            begin,
98            end,
99            seek_free,
100            nbytes_free,
101            n_entries_free,
102            n_bytes_name,
103            pointer_size,
104            compression,
105            seek_info,
106            nbytes_info,
107            uuid,
108            seek_dir,
109        },
110    ))
111}
112
113/// Parse a file-pointer based on the version of the file
114fn versioned_pointer(input: &[u8], version: i16) -> nom::IResult<&[u8], u64> {
115    if version > 1000 {
116        be_u64(input)
117    } else {
118        map!(input, be_i32, |val| val as u64)
119    }
120}
121
122#[rustfmt::skip::macros(do_parse)]
123named!(
124    #[doc="Directory within a root file; exists on ever file"],
125    directory<&[u8], Directory>,
126    do_parse!(
127        version: be_i16 >>
128            c_time: be_u32 >>
129            m_time: be_u32 >>
130            n_bytes_keys: be_i32 >>
131            n_bytes_name: be_i32 >>
132            seek_dir: call!(versioned_pointer, version) >>
133            seek_parent: call!(versioned_pointer, version) >>
134            seek_keys: call!(versioned_pointer, version) >>
135            ({
136                Directory {version, c_time, m_time, n_bytes_keys,
137                           n_bytes_name, seek_dir, seek_parent, seek_keys,
138                }})
139    )
140);
141
142impl RootFile {
143    /// Open a new ROOT file either from a `Url`, or from a `Path`
144    /// (not available on `wasm32`).
145    pub async fn new<S: Into<Source>>(source: S) -> Result<Self, Error> {
146        let source = source.into();
147        let hdr = source.fetch(0, FILE_HEADER_SIZE).await.and_then(|buf| {
148            file_header(&buf)
149                .map_err(|_| format_err!("Failed to parse file header"))
150                .map(|(_i, o)| o)
151        })?;
152        // Jump to the TDirectory and parse it
153        let dir = source
154            .fetch(hdr.seek_dir, TDIRECTORY_MAX_SIZE)
155            .await
156            .and_then(|buf| {
157                directory(&buf)
158                    .map_err(|_| format_err!("Failed to parse TDirectory"))
159                    .map(|(_i, o)| o)
160            })?;
161        let tkey_of_keys = source
162            .fetch(dir.seek_keys, dir.n_bytes_keys as u64)
163            .await
164            .and_then(|buf| {
165                tkey(&buf)
166                    .map_err(|_| format_err!("Failed to parse TKeys"))
167                    .map(|(_i, o)| o)
168            })?;
169        let keys = match tkey_headers(&tkey_of_keys.obj) {
170            Ok((_, hdrs)) => Ok(hdrs),
171            _ => Err(format_err!("Expected TKeyHeaders")),
172        }?;
173        let items = keys
174            .iter()
175            .map(|k_hdr| FileItem::new(k_hdr, source.clone()))
176            .collect();
177
178        Ok(RootFile { source, hdr, items })
179    }
180
181    pub async fn get_streamer_context(&self) -> Result<Context, Error> {
182        let seek_info_len = (self.hdr.nbytes_info + 4) as u64;
183        let info_key = self
184            .source
185            .fetch(self.hdr.seek_info, seek_info_len)
186            .await
187            .map(|buf| tkey(&buf).unwrap().1)?;
188
189        let key_len = info_key.hdr.key_len;
190        Ok(Context {
191            source: self.source.clone(),
192            offset: key_len as u64 + MAP_OFFSET,
193            s: info_key.obj,
194        })
195    }
196
197    /// Slice of the items contained in this file
198    pub fn items(&self) -> &[FileItem] {
199        &self.items
200    }
201
202    /// Translate the streamer info of this file to a YAML file
203    pub async fn streamer_infos(&self) -> Result<Vec<TStreamerInfo>, Error> {
204        let ctx = self.get_streamer_context().await?;
205        let buf = ctx.s.as_slice();
206        let (_, streamer_vec) = streamers::<VerboseError<_>>(buf, &ctx)
207            .map_err(|_| format_err!("Failed to parse TStreamers"))?;
208        Ok(streamer_vec)
209    }
210
211    /// Translate the streamer info of this file to a YAML file
212    pub async fn streamer_info_as_yaml<W: fmt::Write>(&self, s: &mut W) -> Result<(), Error> {
213        for el in &self.streamer_infos().await? {
214            writeln!(s, "{:#}", el.to_yaml())?;
215        }
216        Ok(())
217    }
218
219    /// Generate Rust code from the streamer info of this file
220    pub async fn streamer_info_as_rust<W: fmt::Write>(&self, s: &mut W) -> Result<(), Error> {
221        // Add necessary imports at the top of the file
222        writeln!(
223            s,
224            "{}",
225            quote! {
226                use std::marker::PhantomData;
227                use nom::*;
228                use parsers::*;
229                use parsers::utils::*;
230                use core_types::*;
231            }
232            .to_string()
233        )?;
234        let streamer_infos = self.streamer_infos().await?;
235        // generate structs
236        for el in &streamer_infos {
237            // The structs contain comments which introduce line breaks; i.e. readable
238            writeln!(s, "{}", el.to_struct().to_string())?;
239        }
240
241        // generate parsers
242        for el in &streamer_infos {
243            // The parsers have no comments, but are ugly; We introduce some
244            // Linebreaks here to not have rustfmt choke later (doing it later
245            // is inconvinient since the comments in the structs might contain
246            // the patterns
247            let parsers = el.to_named_parser().to_string();
248            let parsers = parsers.replace(",", ",\n");
249            let parsers = parsers.replace(">>", ">>\n");
250            // macro names are generated as my_macro ! (...) by `quote`
251            let parsers = parsers.replace(" ! (", "!(");
252            writeln!(s, "{}", parsers)?;
253        }
254        Ok(())
255    }
256}
257
258#[cfg(all(test, not(target_arch = "wasm32")))]
259mod test {
260    use super::*;
261    use std::path::Path;
262
263    use nom::error::VerboseError;
264    use nom::multi::length_value;
265    use reqwest::Url;
266    use tokio;
267
268    const SIMPLE_FILE_REMOTE: &str =
269	"https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true";
270
271    #[tokio::test]
272    async fn read_cms_file_remote() {
273        let url = "http://opendata.web.cern.ch/eos/opendata/cms/hidata/HIRun2010/HIAllPhysics/RECO/ZS-v2/0000/001DA267-7243-E011-B38F-001617C3B6CE.root";
274        let f = RootFile::new(Url::parse(url).unwrap()).await.unwrap();
275        let mut s = String::new();
276        f.streamer_info_as_yaml(&mut s).await.unwrap();
277        println!("{}", s);
278        for item in f.items() {
279            item.as_tree().await.unwrap();
280        }
281    }
282
283    #[tokio::test]
284    async fn file_header_test() {
285        let local = Source::new(Path::new("./src/test_data/simple.root"));
286        let remote = Source::new(Url::parse(SIMPLE_FILE_REMOTE).unwrap());
287        let sources: Vec<Source> = vec![local, remote];
288        for source in &sources {
289            let hdr = source
290                .fetch(0, FILE_HEADER_SIZE)
291                .await
292                .and_then(|buf| {
293                    file_header(&buf)
294                        .map_err(|_| format_err!("Failed to parse file header"))
295                        .map(|(_i, o)| o)
296                })
297                .unwrap();
298
299            let should = FileHeader {
300                version: 60600,
301                begin: 100,
302                end: 5614,
303                seek_free: 5559,
304                nbytes_free: 55,
305                n_entries_free: 1,
306                n_bytes_name: 58,
307                pointer_size: 4,
308                compression: 1,
309                seek_info: 1117,
310                nbytes_info: 4442,
311                uuid: Uuid::from_u128(154703765255331693287451041600576143087),
312                seek_dir: 158,
313            };
314            assert_eq!(hdr, should);
315        }
316    }
317
318    #[tokio::test]
319    async fn directory_test() {
320        let local = Path::new("./src/test_data/simple.root").into();
321        let remote = Source::new(Url::parse(SIMPLE_FILE_REMOTE).unwrap());
322        let sources: Vec<Source> = vec![local, remote];
323        for source in &sources {
324            let hdr = source
325                .fetch(0, FILE_HEADER_SIZE)
326                .await
327                .and_then(|buf| {
328                    file_header(&buf)
329                        .map_err(|_| format_err!("Failed to parse file header"))
330                        .map(|(_i, o)| o)
331                })
332                .unwrap();
333
334            let dir = source
335                .fetch(hdr.seek_dir, TDIRECTORY_MAX_SIZE)
336                .await
337                .and_then(|buf| {
338                    directory(&buf)
339                        .map_err(|_| format_err!("Failed to parse file header"))
340                        .map(|(_i, o)| o)
341                })
342                .unwrap();
343            assert_eq!(
344                dir,
345                Directory {
346                    version: 5,
347                    c_time: 1418768412,
348                    m_time: 1418768412,
349                    n_bytes_keys: 96,
350                    n_bytes_name: 58,
351                    seek_dir: 100,
352                    // TODO: This should probably be an Option
353                    seek_parent: 0,
354                    seek_keys: 1021
355                }
356            );
357        }
358    }
359
360    #[tokio::test]
361    async fn streamerinfo_test() {
362        let local = Path::new("./src/test_data/simple.root").into();
363        let remote = Url::parse(
364	    "https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true")
365	    .unwrap()
366	    .into();
367        let sources: Vec<Source> = vec![local, remote];
368        for source in &sources {
369            let key = source
370                .fetch(1117, 4446)
371                .await
372                .and_then(|buf| {
373                    tkey(&buf)
374                        .map_err(|_| format_err!("Failed to parse file header"))
375                        .map(|(_i, o)| o)
376                })
377                .unwrap();
378            assert_eq!(key.hdr.obj_name, "StreamerInfo");
379
380            let key_len = key.hdr.key_len;
381            let k_map_offset = 2;
382            let context = Context {
383                source: source.clone(),
384                offset: (key_len + k_map_offset) as u64,
385                s: key.obj,
386            };
387
388            match length_value(checked_byte_count, |i| {
389                tlist::<VerboseError<_>>(i, &context)
390            })(&context.s)
391            {
392                Ok((_, l)) => {
393                    assert_eq!(l.len(), 19);
394                }
395                Err(_e) => panic!("Not parsed as TList!"),
396            };
397        }
398    }
399}