apple_bom/
format.rs

1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! BOM file format primitives.
10//!
11//! Apple doesn't appear to have documented the BOM file format in any
12//! publications or open source source code. So details of our understanding
13//! of the BOM format could be wildly inaccurate.
14//!
15//! # File Format
16//!
17//! BOM files start with a header, [BomHeader]. The first 8 bytes of which
18//! are magic `BOMStore`.
19//!
20//! BOM files logically consist of a collection of *blocks* and
21//! *variables*. Each of these is defined by an *index*, the location of
22//! which is defined in [BomHeader]. The *blocks* index is defined by
23//! [BomBlocksIndex] and the *vars* index by [BomVarsIndex].
24//!
25//! Each *block* is simply an offset and length effectively denoting a
26//! `&[u8]` from the source data. *Blocks* can be multiple types. These
27//! types are represented by `BomBlock*` types in this module. The type
28//! of each *block* is not explicitly captured by the blocks index. Rather,
29//! block indices are referenced elsewhere and the block type is inferred
30//! by the context of its reference.
31//!
32//! *Variables* define named content in the BOM, with each name denoting
33//! special behavior. Each generic variable is defined by [BomVar] and
34//! consists of a name and *block* index holding its data.
35//!
36//! # Block Types
37//!
38//! Here are the known block types:
39//!
40//! * [BomBlockBomInfo]
41//! * [BomBlockFile]
42//! * [BomBlockPathInfoIndex]
43//! * [BomBlockPathRecord]
44//! * [BomBlockPathRecordPointer]
45//! * [BomBlockPaths]
46//! * [BomBlockTree]
47//! * [BomBlockTreePointer]
48//! * [BomBlockVIndex]
49//!
50//! See the documentation for each type for more details.
51//!
52//! # Variables
53//!
54//! This section documents what we know about each named variable.
55//!
56//! ## BomInfo
57//!
58//! Defines high-level information about the BOM. Its block data is [BomBlockBomInfo].
59//!
60//! ## Paths
61//!
62//! Defines the paths tracked by the BOM. Its block data is [BomBlockTree].
63//!
64//! ## HLIndex
65//!
66//! Defines hard links. Its block data is [BomBlockTree].
67//!
68//! ## VIndex
69//!
70//! Unknown. Its block data is [BomBlockVIndex].
71//!
72//! ## Size64
73//!
74//! Unknown. Its block data is [BomBlockTree].
75//!
76//! # Layout
77//!
78//! In theory, the sequential ordering of blocks and variables can be random,
79//! as variables refer to block indices and blocks also refer to block indices.
80//! This section describes the layout seen in BOMs generated by Apple tools.
81//!
82//! Variables occur in the order `BomInfo`, `Paths`, `HLIndex`, `VIndex`,
83//! `Size64`.
84//!
85//! The 1st block (index 0) appears to always be a NULL block with no
86//! content.
87//!
88//! Block 1 is a [BomBlockBomInfo].
89//!
90//! Block 2 is a [BomBlockTree] holding the `Paths` root tree record.
91//!
92//! Block 3 is a [BomBlockPaths] holding initial paths for the `Paths` variable.
93//!
94//! Block 4 is a [BomBlockTree] holding the `HLIndex` root tree record.
95//!
96//! Block 5 is a [BomBlockPaths] holding initial paths for the `HLIndex` variable.
97//!
98//! Block 6 is a [BomBlockVIndex] holding the data structure for the `VIndex`
99//! variable.
100//!
101//! Block 7 is a [BomBlockTree] holding the `VIndex` root tree.
102//!
103//! Block 8 is a [BomBlockPaths] holding initial paths for the `VIndex` tree.
104//!
105//! Block 9 is a [BomBlockTree] holding the `Size64` root tree.
106//!
107//! Block 10 is a [BomBlockPaths] holding initial paths for the `Size64` tree.
108//!
109//! Starting at block 11 are records describing paths. This starts with a
110//! [BomBlockPathRecord] holding concrete path info (type, mode, size, owner,
111//! checksum, etc). Its [BomBlockFile] and [BomBlockPathInfoIndex] follow.
112//! Both [BomBlockFile] and [BomBlockPathInfoIndex] are pointed to by entries
113//! in [BomBlockPaths]. [BomBlockPathInfoIndex] points to the block index of
114//! the [BomBlockPathRecord] it is describing.
115//!
116//! These [BomBlockPathRecord], [BomBlockFile], and [BomBlockPathInfoIndex]
117//! triplets repeat for every path described in the [BomBlockPaths].
118//!
119//! Multiple [BomBlockPaths] may be necessary to hold all file records. These
120//! blocks may be located in the middle of the aforementioned sequence of
121//! path blocks. The block order doesn't appear to be strict. For example,
122//! single [BomBlockPaths] referencing blocks both before and after the
123//! current block can occur.
124//!
125//! Following the final [BomBlockPathInfoIndex] are tuples of [BomBlockTree],
126//! [BomBlockPaths], [BomBlockPathRecordPointer], [BomBlockTreePointer]. The
127//! purpose of these is not fully known. The [BomBlockPaths] records always
128//! appear to be empty. There appears to be a [BomBlockPathRecordPointer] for
129//! every path in the BOM.
130//!
131//! The order of the blocks in the payload and in the blocks index may not
132//! match. For example, Apple's BOM creation puts blocks with low block
133//! numbers such as [BomBlockBomInfo] (block #1) and the [BomBlockPaths]
134//! structures towards the end of the payload. It is likely that *index* data
135//! is buffered and then written out at the end, once the state of the world
136//! is fully known.
137
138use {
139    crate::{
140        error::Error,
141        path::{BomPath, BomPathType},
142    },
143    scroll::{IOwrite, Pread, Pwrite, SizeWith},
144    std::{
145        borrow::Cow,
146        collections::HashMap,
147        ffi::CStr,
148        io::{Cursor, Write},
149    },
150};
151
152/// The header for a BOM file.
153#[repr(C)]
154#[derive(Clone, Copy, Default, Debug, IOwrite, Pread, Pwrite, SizeWith)]
155pub struct BomHeader {
156    /// Format magic. Always `BOMStore`
157    pub magic: [u8; 8],
158
159    /// File format version number.
160    pub version: u32,
161
162    /// Number of *blocks* in this BOM.
163    pub number_of_blocks: u32,
164
165    /// Start offset of blocks index relative to start of this header.
166    pub blocks_index_offset: u32,
167
168    /// Length of blocks index in bytes.
169    pub blocks_index_length: u32,
170
171    /// Start offset of variables index relative to start of this header.
172    pub vars_index_offset: u32,
173
174    /// Length of variables index in bytes.
175    pub vars_index_length: u32,
176}
177
178impl BomHeader {
179    /// Obtain the raw data holding the *blocks* index.
180    pub fn blocks_index_data<'a>(&self, data: &'a [u8]) -> &'a [u8] {
181        &data[self.blocks_index_offset as usize
182            ..(self.blocks_index_offset + self.blocks_index_length) as usize]
183    }
184
185    /// Parse the *blocks* index.
186    pub fn blocks_index(&self, data: &[u8]) -> Result<BomBlocksIndex, Error> {
187        self.blocks_index_data(data)
188            .pread_with::<BomBlocksIndex>(0, scroll::BE)
189    }
190
191    /// Obtain the raw data holding the *vars* index.
192    pub fn vars_index_data<'a>(&self, data: &'a [u8]) -> &'a [u8] {
193        &data[self.vars_index_offset as usize
194            ..(self.vars_index_offset + self.vars_index_length) as usize]
195    }
196
197    /// Parse the *vars* index.
198    pub fn vars_index(&self, data: &[u8]) -> Result<BomVarsIndex, Error> {
199        self.vars_index_data(data)
200            .pread_with::<BomVarsIndex>(0, scroll::BE)
201    }
202}
203
204/// Defines *blocks* in the BOM file.
205///
206/// This is the data structure referred to by [BomHeader::blocks_index_offset] and
207/// [BomHeader::blocks_index_length].
208///
209/// The 1st block appears to always be NULL (0 values in its entry).
210///
211/// The blocks count in this data structure and [BomHeader::number_of_blocks] may
212/// disagree. The number of blocks in the file header appears to be the number of
213/// populated blocks, not counting the initial NULL/empty/0 block. And the block
214/// count in this data structure can be substantially larger than what is reported
215/// by the file header.
216#[derive(Clone, Default, Debug)]
217pub struct BomBlocksIndex {
218    /// The number of entries in this index.
219    pub count: u32,
220
221    /// The records defining individual blocks.
222    pub blocks: Vec<BomBlocksEntry>,
223}
224
225impl BomBlocksIndex {
226    /// Write this data structure to a writer.
227    pub fn write(&self, writer: &mut impl Write) -> Result<(), Error> {
228        writer.iowrite_with(self.count, scroll::BE)?;
229
230        for entry in &self.blocks {
231            writer.iowrite_with(*entry, scroll::BE)?;
232        }
233
234        Ok(())
235    }
236
237    /// Serialize this data structure to bytes.
238    pub fn to_vec(&self) -> Result<Vec<u8>, Error> {
239        let mut writer = Cursor::new(Vec::<u8>::new());
240        self.write(&mut writer)?;
241        Ok(writer.into_inner())
242    }
243}
244
245impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for BomBlocksIndex {
246    type Error = Error;
247
248    fn try_from_ctx(data: &'a [u8], le: scroll::Endian) -> Result<(Self, usize), Self::Error> {
249        let offset = &mut 0;
250
251        let count = data.gread_with::<u32>(offset, le)?;
252        let mut blocks = Vec::with_capacity(count as usize);
253
254        for _ in 0..count {
255            blocks.push(data.gread_with::<BomBlocksEntry>(offset, le)?);
256        }
257
258        Ok((Self { count, blocks }, *offset))
259    }
260}
261
262/// Defines the location of a *block*.
263///
264/// This type is part of [BomBlocksIndex].
265#[repr(C)]
266#[derive(Clone, Copy, Default, Debug, IOwrite, Pread, Pwrite, SizeWith)]
267pub struct BomBlocksEntry {
268    /// Start offset of block data relative to start of file / [BomHeader].
269    pub file_offset: u32,
270
271    /// Length in bytes of block data.
272    pub length: u32,
273}
274
275/// Describes an individual BOM variable.
276///
277/// A variable consists of a string name and pointer to the block index
278/// holding its variable-specific data.
279#[derive(Clone, Debug)]
280pub struct BomVar {
281    /// Index of block holding data for this variable.
282    pub block_index: u32,
283
284    /// Length of name. Does not include NULL terminator.
285    pub name_length: u8,
286
287    /// Name of variable.
288    pub name: String,
289}
290
291impl BomVar {
292    /// Construct a new instance given a string.
293    pub fn new(block_index: u32, name: impl ToString) -> Result<Self, Error> {
294        let name = name.to_string();
295
296        if name.as_bytes().len() > 254 {
297            return Err(Error::BadVariableString);
298        }
299
300        Ok(Self {
301            block_index,
302            name_length: name.as_bytes().len() as u8 + 1,
303            name,
304        })
305    }
306
307    /// Write this data structure to a writer.
308    pub fn write(&self, writer: &mut impl Write) -> Result<(), Error> {
309        writer.iowrite_with(self.block_index, scroll::BE)?;
310        writer.iowrite_with(self.name_length, scroll::BE)?;
311        writer.write_all(self.name.as_bytes())?;
312        writer.write_all(b"\0")?;
313
314        Ok(())
315    }
316}
317
318impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for BomVar {
319    type Error = Error;
320
321    fn try_from_ctx(data: &'a [u8], le: scroll::Endian) -> Result<(Self, usize), Self::Error> {
322        let index = data.pread_with(0, le)?;
323        let length = data.pread_with(4, le)?;
324
325        let name_data = &data[5..5 + length as usize];
326        let name = String::from_utf8(name_data.to_vec()).map_err(|_| Error::BadVariableString)?;
327
328        Ok((
329            Self {
330                block_index: index,
331                name_length: length,
332                name,
333            },
334            5 + name_data.len(),
335        ))
336    }
337}
338
339/// Block type for `BomInfo` variable.
340///
341/// Describes high-level information about the BOM, notably the version and
342/// number of paths.
343#[repr(C)]
344#[derive(Clone, Default, Debug)]
345pub struct BomBlockBomInfo {
346    /// BOM version.
347    pub version: u32,
348
349    /// Total number of paths tracked by this BOM.
350    pub number_of_paths: u32,
351
352    /// Number of [BomInfoEntry] records in this data structure.
353    pub number_of_info_entries: u32,
354
355    /// Further describes attributes of the BOM.
356    pub entries: Vec<BomInfoEntry>,
357}
358
359impl BomBlockBomInfo {
360    /// Write this data structure to a writer.
361    pub fn write(&self, writer: &mut impl Write) -> Result<(), Error> {
362        writer.iowrite_with(self.version, scroll::BE)?;
363        writer.iowrite_with(self.number_of_paths, scroll::BE)?;
364        writer.iowrite_with(self.number_of_info_entries, scroll::BE)?;
365
366        for entry in &self.entries {
367            writer.iowrite_with(*entry, scroll::BE)?;
368        }
369
370        Ok(())
371    }
372}
373
374impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for BomBlockBomInfo {
375    type Error = Error;
376
377    fn try_from_ctx(data: &'a [u8], le: scroll::Endian) -> Result<(Self, usize), Self::Error> {
378        let offset = &mut 0;
379
380        let version = data.gread_with(offset, le)?;
381        let number_of_paths = data.gread_with(offset, le)?;
382        let number_of_info_entries = data.gread_with(offset, le)?;
383        let mut entries = Vec::with_capacity(number_of_info_entries as usize);
384
385        for _ in 0..number_of_info_entries {
386            entries.push(data.gread_with(offset, le)?);
387        }
388
389        Ok((
390            Self {
391                version,
392                number_of_paths,
393                number_of_info_entries,
394                entries,
395            },
396            *offset,
397        ))
398    }
399}
400
401/// Holds data records stored within [BomBlockBomInfo].
402///
403/// The fields have something to do with architecture information. But we don't
404/// know what exactly.
405#[repr(C)]
406#[derive(Clone, Copy, Default, Debug, IOwrite, Pread, Pwrite, SizeWith)]
407pub struct BomInfoEntry {
408    pub a: u32,
409    pub b: u32,
410    pub c: u32,
411    pub d: u32,
412}
413
414/// Block describing a named file.
415#[derive(Clone, Debug)]
416pub struct BomBlockFile<'a> {
417    /// Internal path ID of parent path.
418    ///
419    /// `0` means no parent (this file exists at the root).
420    pub parent_path_id: u32,
421
422    /// The name of this file.
423    ///
424    /// Only the leaf file or directory name. i.e. the final component in a
425    /// path.
426    pub name: Cow<'a, CStr>,
427}
428
429impl<'a> BomBlockFile<'a> {
430    /// Write this data structure to a writer.
431    pub fn write(&self, writer: &mut impl Write) -> Result<(), Error> {
432        writer.iowrite_with(self.parent_path_id, scroll::BE)?;
433        writer.write_all(self.name.to_bytes_with_nul())?;
434
435        Ok(())
436    }
437
438    /// Obtain the file name as a [String].
439    pub fn string_file_name(&self) -> String {
440        self.name.to_string_lossy().to_string()
441    }
442}
443
444impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for BomBlockFile<'a> {
445    type Error = Error;
446
447    fn try_from_ctx(data: &'a [u8], le: scroll::Endian) -> Result<(Self, usize), Self::Error> {
448        let parent = data.pread_with(0, le)?;
449        let name =
450            Cow::from(CStr::from_bytes_with_nul(&data[4..]).map_err(|_| Error::BadVariableString)?);
451
452        Ok((
453            Self {
454                parent_path_id: parent,
455                name,
456            },
457            data.len(),
458        ))
459    }
460}
461
462/// A pointer to a block index holding a [BomBlockPathRecord].
463///
464/// We're unsure what this block type is used for. But instances appear to
465/// follow [BomBlockTree] and [BomBlockPaths] entries for every given path.
466/// Maybe it allows instances of [BomBlockTree] to easily obtain a reference
467/// back to the [BomBlockPathRecord] since this pointer appears to always exist
468/// at block index [BomBlockTree::block_paths_index] + 1.
469#[repr(C)]
470#[derive(Clone, Copy, Default, Debug, IOwrite, Pread, Pwrite, SizeWith)]
471pub struct BomBlockPathRecordPointer {
472    /// Block index of corresponding [BomBlockPathRecord].
473    pub block_path_record_index: u32,
474}
475
476impl BomBlockPathRecordPointer {
477    pub fn path_record<'a>(&self, bom: &'a ParsedBom) -> Result<BomBlockPathRecord<'a>, Error> {
478        bom.block_as_path_record(self.block_path_record_index as _)
479    }
480}
481
482/// Block type describing a collection of paths.
483///
484/// Instances come in 2 flavors, denoted by [Self::is_path_info]. If this field
485/// is `1`, the [BomPathsEntry] instances describe specific tracked poths by
486/// pointing to blocks with [BomBlockPathInfoIndex] and [BomBlockFile] that
487/// describe each path. If `0`, the [BomPathsEntry] is a pointer to another
488/// [BomBlockPaths] instance.
489///
490/// Each logical path appears to have an internal numeric identifier uniquely
491/// describing the path. This *path ID* is used for paths to refer to each
492/// other. For example, [BomBlockFile] refers to its parent directory/path
493/// via this ID.
494#[repr(C)]
495#[derive(Clone, Default, Debug)]
496pub struct BomBlockPaths {
497    /// Whether the block pointer in [BomPathsEntry] refers to [BomBlockPathInfoIndex].
498    ///
499    /// 0 means it is a pointer to [BomBlockPaths].
500    pub is_path_info: u16,
501
502    /// The number of [BomPathsEntry] in this data structure.
503    pub count: u16,
504
505    /// Block index of [BomBlockPaths] that is after this one.
506    pub next_paths_block_index: u32,
507
508    /// Block index of [BomBlockPaths] that is before this one.
509    pub previous_paths_block_index: u32,
510
511    /// The paths tracked by this instance.
512    pub paths: Vec<BomPathsEntry>,
513}
514
515impl BomBlockPaths {
516    /// Write this data structure to a writer.
517    pub fn write(&self, writer: &mut impl Write) -> Result<(), Error> {
518        writer.iowrite_with(self.is_path_info, scroll::BE)?;
519        writer.iowrite_with(self.count, scroll::BE)?;
520        writer.iowrite_with(self.next_paths_block_index, scroll::BE)?;
521        writer.iowrite_with(self.previous_paths_block_index, scroll::BE)?;
522
523        for entry in &self.paths {
524            writer.iowrite_with(*entry, scroll::BE)?;
525        }
526
527        Ok(())
528    }
529
530    /// Resolve the [BomBlockFile] for a path at a given index.
531    pub fn file_at<'a>(&self, bom: &'a ParsedBom, index: usize) -> Result<BomBlockFile<'a>, Error> {
532        self.paths.get(index).ok_or(Error::BadIndex)?.file(bom)
533    }
534
535    /// Resolve the [BomBlockPathInfoIndex] for a path at a given index.
536    pub fn path_info_at(
537        &self,
538        bom: &ParsedBom,
539        index: usize,
540    ) -> Result<BomBlockPathInfoIndex, Error> {
541        self.paths.get(index).ok_or(Error::BadIndex)?.path_info(bom)
542    }
543
544    /// Resolve the internal path ID for a path at a given index.
545    pub fn path_id_at(&self, bom: &ParsedBom, index: usize) -> Result<u32, Error> {
546        Ok(self.path_info_at(bom, index)?.path_id)
547    }
548
549    /// Resolve the [BomBlockPathRecord] for a path at a given index.
550    pub fn path_record_at<'a>(
551        &self,
552        bom: &'a ParsedBom,
553        index: usize,
554    ) -> Result<BomBlockPathRecord<'a>, Error> {
555        self.path_info_at(bom, index)?.path_record(bom)
556    }
557
558    /// Resolve all meaningful path data for a path at a given index.
559    pub fn path_entry_at<'a>(
560        &self,
561        bom: &'a ParsedBom,
562        index: usize,
563    ) -> Result<(u32, BomBlockFile<'a>, BomBlockPathRecord<'a>), Error> {
564        let path_info = self.path_info_at(bom, index)?;
565        let file = self.file_at(bom, index)?;
566        let record = path_info.path_record(bom)?;
567
568        Ok((path_info.path_id, file, record))
569    }
570
571    /// Obtain resolved records for each path defined on this instance.
572    ///
573    /// This will only yield records for the current block.
574    ///
575    /// See [BomBlockTree::bom_paths] for logic that iterates over paths
576    /// across multiple block records.
577    pub fn iter_path_entries<'a, 'b: 'a>(
578        &'a self,
579        bom: &'b ParsedBom,
580    ) -> impl Iterator<Item = Result<(u32, BomBlockFile<'b>, BomBlockPathRecord<'b>), Error>> + 'a
581    {
582        self.paths
583            .iter()
584            .enumerate()
585            .map(move |(i, _)| self.path_entry_at(bom, i))
586    }
587}
588
589impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for BomBlockPaths {
590    type Error = scroll::Error;
591
592    fn try_from_ctx(data: &'a [u8], le: scroll::Endian) -> Result<(Self, usize), Self::Error> {
593        let offset = &mut 0;
594
595        let is_path_info = data.gread_with::<u16>(offset, le)?;
596        let count = data.gread_with::<u16>(offset, le)?;
597        let forward = data.gread_with::<u32>(offset, le)?;
598        let backward = data.gread_with::<u32>(offset, le)?;
599
600        let mut paths = Vec::with_capacity(count as usize);
601        for _ in 0..count {
602            paths.push(data.gread_with::<BomPathsEntry>(offset, le)?);
603        }
604
605        Ok((
606            Self {
607                is_path_info,
608                count,
609                next_paths_block_index: forward,
610                previous_paths_block_index: backward,
611                paths,
612            },
613            *offset,
614        ))
615    }
616}
617
618/// Describes where to find metadata on a single path.
619///
620/// This type is contained within [BomBlockPaths].
621#[repr(C)]
622#[derive(Clone, Copy, Default, Debug, IOwrite, Pread, Pwrite, SizeWith)]
623pub struct BomPathsEntry {
624    /// Block index of associated data structure.
625    ///
626    /// It appears this can refer to both a [BomBlockPathInfoIndex] or
627    /// a [BomBlockPaths]. When referring to a [BomBlockPathInfoIndex],
628    /// `file_index` is this path's [BomBlockFile] index. When referring
629    /// to a [BomBlockPaths], `file_index` appears to refer to the final
630    /// [BomBlockFile] referred to by the [BomBlockPaths].
631    pub block_index: u32,
632
633    ///
634    /// Block index of [BomBlockFile].
635    pub file_index: u32,
636}
637
638impl BomPathsEntry {
639    /// Resolve the [BomBlockPathInfoIndex] this instance points to.
640    pub fn path_info(&self, bom: &ParsedBom) -> Result<BomBlockPathInfoIndex, Error> {
641        bom.block_as_path_info_index(self.block_index as _)
642    }
643
644    /// Resolve the [BomBlockPaths] this instance points to.
645    pub fn paths(&self, bom: &ParsedBom) -> Result<BomBlockPaths, Error> {
646        bom.block_as_paths(self.block_index as _)
647    }
648
649    /// Resolve the [BomBlockFile] this instance points to.
650    pub fn file<'a>(&self, bom: &'a ParsedBom) -> Result<BomBlockFile<'a>, Error> {
651        bom.block_as_file(self.file_index as _)
652    }
653}
654
655/// Block type describing a single path.
656#[repr(C)]
657#[derive(Clone, Copy, Default, Debug, IOwrite, Pread, Pwrite, SizeWith)]
658pub struct BomBlockPathInfoIndex {
659    /// Unique identifier for this path.
660    ///
661    /// This is not a block index.
662    pub path_id: u32,
663
664    /// Block index of [BomBlockPathRecord] holding metadata for this path.
665    pub path_record_index: u32,
666}
667
668impl BomBlockPathInfoIndex {
669    /// Resolve the [BomBlockPathRecord] this instance points to.
670    pub fn path_record<'a>(&self, bom: &'a ParsedBom) -> Result<BomBlockPathRecord<'a>, Error> {
671        bom.block_as_path_record(self.path_record_index as _)
672    }
673}
674
675/// Block type defining low-level path information.
676///
677/// This is where most of the metadata defining a BOM path lives.
678#[repr(C)]
679#[derive(Clone, Default, Debug)]
680pub struct BomBlockPathRecord<'a> {
681    /// The type of the path.
682    ///
683    /// See [crate::BomPathType] for definitions.
684    pub path_type: u8,
685
686    /// Unknown.
687    pub a: u8,
688
689    /// File architecture.
690    ///
691    /// Probably corresponds to value in Mach-O header.
692    pub architecture: u16,
693
694    /// File mode.
695    pub mode: u16,
696
697    /// UID of owner.
698    pub user: u32,
699
700    /// GID of owner.
701    pub group: u32,
702
703    /// Modified time in seconds since UNIX epoch.
704    pub mtime: u32,
705
706    /// Size in bytes.
707    pub size: u32,
708
709    /// Unknown.
710    pub b: u8,
711
712    /// CRC32 checksum or device type.
713    pub checksum_or_type: u32,
714
715    /// Length of link name.
716    ///
717    /// May be non-0 for non-link path records.
718    ///
719    /// Includes NULL terminator.
720    pub link_name_length: u32,
721
722    /// Link path name.
723    pub link_name: Option<Cow<'a, CStr>>,
724}
725
726impl<'a> BomBlockPathRecord<'a> {
727    /// Write this data structure to a writer.
728    pub fn write(&self, writer: &mut impl Write) -> Result<(), Error> {
729        writer.iowrite_with(self.path_type, scroll::BE)?;
730        writer.iowrite_with(self.a, scroll::BE)?;
731        writer.iowrite_with(self.architecture, scroll::BE)?;
732        writer.iowrite_with(self.mode, scroll::BE)?;
733        writer.iowrite_with(self.user, scroll::BE)?;
734        writer.iowrite_with(self.group, scroll::BE)?;
735        writer.iowrite_with(self.mtime, scroll::BE)?;
736        writer.iowrite_with(self.size, scroll::BE)?;
737        writer.iowrite_with(self.b, scroll::BE)?;
738        writer.iowrite_with(self.checksum_or_type, scroll::BE)?;
739        writer.iowrite_with(self.link_name_length, scroll::BE)?;
740        if let Some(link_name) = &self.link_name {
741            writer.write_all(link_name.to_bytes_with_nul())?;
742        }
743
744        Ok(())
745    }
746
747    /// Obtain the link name of this record, if present.
748    pub fn string_link_name(&self) -> Option<String> {
749        self.link_name
750            .as_ref()
751            .map(|s| s.to_string_lossy().to_string())
752    }
753}
754
755impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for BomBlockPathRecord<'a> {
756    type Error = Error;
757
758    fn try_from_ctx(data: &'a [u8], le: scroll::Endian) -> Result<(Self, usize), Self::Error> {
759        let offset = &mut 0;
760
761        let path_type = data.gread_with(offset, le)?;
762        let a = data.gread_with(offset, le)?;
763        let architecture = data.gread_with(offset, le)?;
764        let mode = data.gread_with(offset, le)?;
765        let user = data.gread_with(offset, le)?;
766        let group = data.gread_with(offset, le)?;
767        let mtime = data.gread_with(offset, le)?;
768        let size = data.gread_with(offset, le)?;
769        let b = data.gread_with(offset, le)?;
770        let checksum_or_type = data.gread_with(offset, le)?;
771        let link_name_length = data.gread_with(offset, le)?;
772
773        let link_name = if path_type == BomPathType::Link.into() && link_name_length > 0 {
774            let link_name_data = &data[*offset..*offset + link_name_length as usize];
775            Some(Cow::from(
776                CStr::from_bytes_with_nul(link_name_data).map_err(|_| Error::BadVariableString)?,
777            ))
778        } else {
779            None
780        };
781
782        Ok((
783            Self {
784                path_type,
785                a,
786                architecture,
787                mode,
788                user,
789                group,
790                mtime,
791                size,
792                b,
793                checksum_or_type,
794                link_name_length,
795                link_name,
796            },
797            *offset,
798        ))
799    }
800}
801
802/// Block type for various variables describing a collection/tree of paths.
803#[repr(C)]
804#[derive(Clone, Copy, Debug, IOwrite, Pwrite, SizeWith)]
805pub struct BomBlockTree {
806    /// Always `tree`.
807    pub tree: [u8; 4],
808
809    /// Version of this data structure.
810    pub version: u32,
811
812    /// Block index of [BomBlockPaths] describing paths.
813    pub block_paths_index: u32,
814
815    /// Block size. Always appears to be 4096.
816    pub block_size: u32,
817
818    /// Number of paths tracked by this tree.
819    pub path_count: u32,
820
821    /// Unknown.
822    pub a: u8,
823}
824
825impl Default for BomBlockTree {
826    fn default() -> Self {
827        Self {
828            tree: *b"tree",
829            version: 1,
830            block_paths_index: 0,
831            block_size: 0,
832            path_count: 0,
833            a: 0,
834        }
835    }
836}
837
838impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for BomBlockTree {
839    type Error = Error;
840
841    fn try_from_ctx(data: &'a [u8], le: scroll::Endian) -> Result<(Self, usize), Self::Error> {
842        let offset = &mut 4;
843
844        let tree: [u8; 4] = [data[0], data[1], data[2], data[3]];
845        let version = data.gread_with(offset, le)?;
846        let block_paths_index = data.gread_with(offset, le)?;
847        let block_size = data.gread_with(offset, le)?;
848        let path_count = data.gread_with(offset, le)?;
849        let a = data.gread_with(offset, le)?;
850
851        if &tree != b"tree" {
852            return Err(Error::Scroll(scroll::Error::Custom(
853                "bad tree magic".into(),
854            )));
855        }
856
857        Ok((
858            Self {
859                tree,
860                version,
861                block_paths_index,
862                block_size,
863                path_count,
864                a,
865            },
866            *offset,
867        ))
868    }
869}
870
871impl BomBlockTree {
872    /// Resolve the [BomBlockPaths] this instance points to.
873    pub fn paths(&self, bom: &ParsedBom) -> Result<BomBlockPaths, Error> {
874        bom.block_as_paths(self.block_paths_index as _)
875    }
876
877    /// Resolve the [BomBlockPaths] that is the root of the tree.
878    pub fn root_paths(&self, bom: &ParsedBom) -> Result<BomBlockPaths, Error> {
879        let mut paths = self.paths(bom)?;
880
881        while paths.is_path_info == 0 {
882            let entry = paths.paths.get(0).ok_or(Error::BadIndex)?;
883            paths = entry.paths(bom)?;
884        }
885
886        Ok(paths)
887    }
888
889    /// Resolve all [BomPath] in this tree.
890    ///
891    /// This contains the logic for iterating over multiple [BomBlockPaths] instances.
892    pub fn bom_paths(&self, bom: &ParsedBom) -> Result<Vec<BomPath>, Error> {
893        let mut res = Vec::with_capacity(self.path_count as _);
894
895        let mut paths = self.root_paths(bom)?;
896        let mut files_by_id = HashMap::with_capacity(res.capacity());
897
898        loop {
899            for entry in paths.iter_path_entries(bom) {
900                let (path_id, file, record) = entry?;
901
902                // The full filename is resolved by traversing the file's parent path ID until
903                // we get to a root.
904                let mut resolve_file = &file;
905                let mut filename = file.string_file_name();
906
907                while resolve_file.parent_path_id != 0 {
908                    resolve_file = files_by_id
909                        .get(&resolve_file.parent_path_id)
910                        .ok_or(Error::BadIndex)?;
911                    filename = format!("{}/{}", resolve_file.string_file_name(), filename);
912                }
913
914                res.push(BomPath::from_record(filename, &record)?);
915
916                files_by_id.insert(path_id, file);
917            }
918
919            if paths.next_paths_block_index != 0 {
920                paths = bom.block_as_paths(paths.next_paths_block_index as _)?;
921            } else {
922                break;
923            }
924        }
925
926        Ok(res)
927    }
928}
929
930/// A pointer to a block index holding a [BomBlockTree].
931///
932/// We're unsure what this block type is used for. But instances appear to
933/// follow [BomBlockPaths] / [BomBlockPathRecordPointer] entries for every given path.
934#[repr(C)]
935#[derive(Clone, Copy, Default, Debug, IOwrite, Pread, Pwrite, SizeWith)]
936pub struct BomBlockTreePointer {
937    /// Block index of corresponding [BomBlockTree].
938    pub block_tree_index: u32,
939}
940
941impl BomBlockTreePointer {
942    pub fn tree(&self, bom: &ParsedBom) -> Result<BomBlockTree, Error> {
943        bom.block_as_tree(self.block_tree_index as _)
944    }
945}
946
947/// Block type for the `VIndex` variable data.
948///
949/// We don't know much about this data structure.
950#[repr(C)]
951#[derive(Clone, Copy, Default, Debug, IOwrite, Pread, Pwrite, SizeWith)]
952pub struct BomBlockVIndex {
953    /// Unknown.
954    pub a: u32,
955
956    /// Block index holding a [BomBlockTree].
957    pub tree_block_index: u32,
958
959    /// Unknown.
960    pub b: u32,
961
962    /// Unknown.
963    pub c: u8,
964}
965
966impl BomBlockVIndex {
967    /// Resolve the [BomBlockTree] this instance points to.
968    pub fn tree(&self, bom: &ParsedBom) -> Result<BomBlockTree, Error> {
969        bom.block_as_tree(self.tree_block_index as _)
970    }
971}
972
973/// The collection of variables in a BOM file.
974///
975/// This structure is what [BomHeader::vars_index_offset] and
976/// [BomHeader::vars_index_length] refers to.
977#[derive(Clone, Debug)]
978pub struct BomVarsIndex {
979    /// Number of variables.
980    pub count: u32,
981
982    /// Records for each variable.
983    pub vars: Vec<BomVar>,
984}
985
986impl BomVarsIndex {
987    /// Write this data structure to a writer.
988    pub fn write(&self, writer: &mut impl Write) -> Result<(), Error> {
989        writer.iowrite_with(self.count, scroll::BE)?;
990
991        for var in &self.vars {
992            var.write(writer)?;
993        }
994
995        Ok(())
996    }
997
998    /// Obtain the bytes representation of this data structure.
999    pub fn to_vec(&self) -> Result<Vec<u8>, Error> {
1000        let mut writer = Cursor::new(Vec::<u8>::new());
1001        self.write(&mut writer)?;
1002        Ok(writer.into_inner())
1003    }
1004}
1005
1006impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for BomVarsIndex {
1007    type Error = Error;
1008
1009    fn try_from_ctx(data: &'a [u8], le: scroll::Endian) -> Result<(Self, usize), Self::Error> {
1010        let offset = &mut 0;
1011
1012        let count = data.gread_with::<u32>(offset, le)?;
1013        let mut vars = Vec::with_capacity(count as usize);
1014
1015        for _ in 0..count {
1016            vars.push(data.gread_with::<BomVar>(offset, le)?);
1017        }
1018
1019        Ok((Self { count, vars }, *offset))
1020    }
1021}
1022
1023/// Enumeration over known block types.
1024#[derive(Clone, Debug)]
1025pub enum BomBlock<'a> {
1026    Empty,
1027    BomInfo(BomBlockBomInfo),
1028    File(BomBlockFile<'a>),
1029    PathInfoIndex(BomBlockPathInfoIndex),
1030    PathRecord(BomBlockPathRecord<'a>),
1031    PathRecordPointer(BomBlockPathRecordPointer),
1032    Paths(BomBlockPaths),
1033    Tree(BomBlockTree),
1034    TreePointer(BomBlockTreePointer),
1035    VIndex(BomBlockVIndex),
1036}
1037
1038impl<'a> BomBlock<'a> {
1039    /// Attempt to resolve an instance from a [ParsedBom] and block index number.
1040    pub fn try_parse(bom: &'a ParsedBom, index: usize) -> Result<Self, Error> {
1041        if index == 1 {
1042            if let Ok(info) = bom.block_as_bom_info(index) {
1043                return Ok(Self::BomInfo(info));
1044            }
1045        }
1046
1047        // Multiple block types may parse correctly. Our strategy to tease out
1048        // false positives is to recursively examine the parsed block and verify
1049        // all parts parse.
1050        if let Ok(tree) = bom.block_as_tree(index) {
1051            if tree.paths(bom).is_ok() {
1052                return Ok(Self::Tree(tree));
1053            }
1054        }
1055
1056        if let Ok(paths) = bom.block_as_paths(index) {
1057            if paths.iter_path_entries(bom).all(|x| x.is_ok()) {
1058                return Ok(Self::Paths(paths));
1059            }
1060        }
1061
1062        if let Ok(vindex) = bom.block_as_vindex(index) {
1063            if vindex.tree(bom).is_ok() {
1064                return Ok(Self::VIndex(vindex));
1065            }
1066        }
1067
1068        if let Ok(index) = bom.block_as_path_info_index(index) {
1069            if index.path_record(bom).is_ok() {
1070                return Ok(Self::PathInfoIndex(index));
1071            }
1072        }
1073
1074        // Path records are quite large.
1075        if let Ok(record) = bom.block_as_path_record(index) {
1076            return Ok(Self::PathRecord(record));
1077        }
1078
1079        if let Ok(file) = bom.block_as_file(index) {
1080            return Ok(Self::File(file));
1081        }
1082
1083        if let Ok(pointer) = bom.block_as_path_record_pointer(index) {
1084            if pointer.path_record(bom).is_ok() {
1085                return Ok(Self::PathRecordPointer(pointer));
1086            }
1087        }
1088
1089        if let Ok(pointer) = bom.block_as_tree_pointer(index) {
1090            if pointer.tree(bom).is_ok() {
1091                return Ok(Self::TreePointer(pointer));
1092            }
1093        }
1094
1095        if let Ok(info) = bom.block_as_bom_info(index) {
1096            return Ok(Self::BomInfo(info));
1097        }
1098
1099        Err(Error::UnknownBlockType)
1100    }
1101
1102    /// Write the block data to a writer.
1103    pub fn write(&self, writer: &mut impl Write) -> Result<(), Error> {
1104        match self {
1105            Self::Empty => {}
1106            Self::BomInfo(b) => {
1107                b.write(writer)?;
1108            }
1109            Self::File(b) => {
1110                b.write(writer)?;
1111            }
1112            Self::PathInfoIndex(b) => {
1113                writer.iowrite_with(*b, scroll::BE)?;
1114            }
1115            Self::PathRecord(b) => {
1116                b.write(writer)?;
1117            }
1118            Self::PathRecordPointer(b) => {
1119                writer.iowrite_with(*b, scroll::BE)?;
1120            }
1121            Self::Paths(b) => {
1122                b.write(writer)?;
1123            }
1124            Self::Tree(b) => {
1125                writer.iowrite_with(*b, scroll::BE)?;
1126            }
1127            Self::TreePointer(b) => {
1128                writer.iowrite_with(*b, scroll::BE)?;
1129            }
1130            Self::VIndex(b) => {
1131                writer.iowrite_with(*b, scroll::BE)?;
1132            }
1133        }
1134
1135        Ok(())
1136    }
1137
1138    /// Serialize this block to its bytes representation.
1139    pub fn to_vec(&self) -> Result<Vec<u8>, Error> {
1140        let mut writer = Cursor::new(Vec::<u8>::new());
1141        self.write(&mut writer)?;
1142
1143        Ok(writer.into_inner())
1144    }
1145}
1146
1147/// Parsed BOM data structure.
1148///
1149/// Instances hold references to the data they are backed by.
1150pub struct ParsedBom<'a> {
1151    /// Underlying data backing this BOM.
1152    pub data: Cow<'a, [u8]>,
1153
1154    /// The file header.
1155    pub header: BomHeader,
1156
1157    /// The blocks index.
1158    pub blocks: BomBlocksIndex,
1159
1160    /// BOM variables.
1161    pub vars: BomVarsIndex,
1162}
1163
1164impl<'a> ParsedBom<'a> {
1165    /// Parse BOM data into a data structure.
1166    ///
1167    /// Only the header and block and variable indices are parsed immediately.
1168    /// Everything else is lazily parsed.
1169    pub fn parse(data: &'a [u8]) -> Result<Self, Error> {
1170        let header = data.pread_with::<BomHeader>(0, scroll::BE)?;
1171
1172        let blocks_index = header.blocks_index(data)?;
1173        let vars = header.vars_index(data)?;
1174
1175        Ok(Self {
1176            data: Cow::Borrowed(data),
1177            header,
1178            blocks: blocks_index,
1179            vars,
1180        })
1181    }
1182
1183    /// Convert to an instance that owns its backing data.
1184    pub fn to_owned(&self) -> ParsedBom<'static> {
1185        ParsedBom {
1186            data: Cow::Owned(self.data.clone().into_owned()),
1187            header: self.header,
1188            blocks: self.blocks.clone(),
1189            vars: self.vars.clone(),
1190        }
1191    }
1192
1193    /// Attempt to locate a named variable.
1194    pub fn find_variable(&self, name: &str) -> Result<&BomVar, Error> {
1195        self.vars
1196            .vars
1197            .iter()
1198            .find(|v| v.name == name)
1199            .ok_or_else(|| Error::NoVar(name.to_string()))
1200    }
1201
1202    /// Attempt to resolve the [BomBlockBomInfo] for this instance.
1203    pub fn bom_info(&self) -> Result<BomBlockBomInfo, Error> {
1204        let var = self.find_variable("BomInfo")?;
1205
1206        self.block_as_bom_info(var.block_index as _)
1207    }
1208
1209    pub fn hl_index(&self) -> Result<Vec<BomPath>, Error> {
1210        let var = self.find_variable("HLIndex")?;
1211        let tree = self.block_as_tree(var.block_index as _)?;
1212
1213        tree.bom_paths(self)
1214    }
1215
1216    pub fn paths(&self) -> Result<Vec<BomPath>, Error> {
1217        let index = self.find_variable("Paths")?;
1218        let tree = self.block_as_tree(index.block_index as _)?;
1219
1220        tree.bom_paths(self)
1221    }
1222
1223    /// Resolve the Size64 tree.
1224    pub fn size64(&self) -> Result<Vec<BomPath>, Error> {
1225        let var = self.find_variable("Size64")?;
1226        let tree = self.block_as_tree(var.block_index as _)?;
1227
1228        tree.bom_paths(self)
1229    }
1230
1231    /// Resolve the V Index.
1232    pub fn vindex(&self) -> Result<Vec<BomPath>, Error> {
1233        let var = self.find_variable("VIndex")?;
1234        let index = self.block_as_vindex(var.block_index as _)?;
1235        let tree = index.tree(self)?;
1236
1237        tree.bom_paths(self)
1238    }
1239
1240    /// Resolve the raw data backing a block given a block index.
1241    pub fn block_data(&self, index: usize) -> Result<&[u8], Error> {
1242        let entry = self.blocks.blocks.get(index).ok_or(Error::BadIndex)?;
1243
1244        Ok(&self.data[entry.file_offset as usize..(entry.file_offset + entry.length) as usize])
1245    }
1246
1247    /// Attempt to resolve a block at an index as a [BomBlockBomInfo].
1248    pub fn block_as_bom_info(&self, index: usize) -> Result<BomBlockBomInfo, Error> {
1249        self.block_data(index)?.pread_with(0, scroll::BE)
1250    }
1251
1252    /// Attempt to resolve a block at an index as a [BomBlockFile].
1253    pub fn block_as_file(&self, index: usize) -> Result<BomBlockFile<'_>, Error> {
1254        self.block_data(index)?.pread_with(0, scroll::BE)
1255    }
1256
1257    /// Attempt to resolve a block at an index as a [BomBlockPathInfoIndex].
1258    pub fn block_as_path_info_index(&self, index: usize) -> Result<BomBlockPathInfoIndex, Error> {
1259        Ok(self.block_data(index)?.pread_with(0, scroll::BE)?)
1260    }
1261
1262    /// Attempt to resolve a block at an index as a [BomBlockPathRecord].
1263    pub fn block_as_path_record(&self, index: usize) -> Result<BomBlockPathRecord, Error> {
1264        self.block_data(index)?.pread_with(0, scroll::BE)
1265    }
1266
1267    /// Attempt to resolve a block at an index as a [BomBlockPathRecordPointer].
1268    pub fn block_as_path_record_pointer(
1269        &self,
1270        index: usize,
1271    ) -> Result<BomBlockPathRecordPointer, Error> {
1272        Ok(self.block_data(index)?.pread_with(0, scroll::BE)?)
1273    }
1274
1275    /// Attempt to resolve a block at an index as a [BomBlockPaths].
1276    pub fn block_as_paths(&self, index: usize) -> Result<BomBlockPaths, Error> {
1277        let data = self.block_data(index)?;
1278        Ok(data.pread_with(0, scroll::BE)?)
1279    }
1280
1281    /// Attempt to resolve a black at an index as a [BomBlockTree].
1282    pub fn block_as_tree(&self, index: usize) -> Result<BomBlockTree, Error> {
1283        let data = self.block_data(index)?;
1284        data.pread_with(0, scroll::BE)
1285    }
1286
1287    /// Attempt to resolve a block at an index as a [BomBlockTreePointer].
1288    pub fn block_as_tree_pointer(&self, index: usize) -> Result<BomBlockTreePointer, Error> {
1289        Ok(self.block_data(index)?.pread_with(0, scroll::BE)?)
1290    }
1291
1292    /// Attempt to resolve a block at an index as a [BomBlockVIndex].
1293    pub fn block_as_vindex(&self, index: usize) -> Result<BomBlockVIndex, Error> {
1294        Ok(self.block_data(index)?.pread_with(0, scroll::BE)?)
1295    }
1296}
1297
1298#[cfg(test)]
1299mod tests {
1300    use super::*;
1301
1302    const PYTHON_DATA: &[u8] = include_bytes!("testdata/python-applications.bom");
1303
1304    #[test]
1305    fn parse_python() -> Result<(), Error> {
1306        let bom = crate::format::ParsedBom::parse(PYTHON_DATA)?;
1307
1308        bom.bom_info()?;
1309
1310        // Forces recursive parsing
1311        for _ in bom.hl_index()? {}
1312        for _ in bom.paths()? {}
1313        for _ in bom.size64()? {}
1314        for _ in bom.vindex()? {}
1315
1316        let root = bom.paths()?.into_iter().find(|p| p.path() == ".").unwrap();
1317        assert_eq!(root.symbolic_mode(), "drwxr-xr-x");
1318
1319        let readme = bom
1320            .paths()?
1321            .into_iter()
1322            .find(|p| p.path() == "./Python 3.9/ReadMe.rtf")
1323            .unwrap();
1324        assert_eq!(readme.symbolic_mode(), "-rw-r--r--");
1325
1326        Ok(())
1327    }
1328}