llvm_bitstream/
lib.rs

1//! `llvm-bitstream` is a library for interpreting files in LLVM's
2//! [bitstream format](https://llvm.org/docs/BitCodeFormat.html).
3
4#![deny(rustdoc::broken_intra_doc_links)]
5#![deny(missing_docs)]
6#![allow(clippy::redundant_field_names)]
7#![forbid(unsafe_code)]
8
9pub mod abbrev;
10pub mod error;
11pub mod parser;
12pub mod record;
13
14use std::io::{Seek, SeekFrom};
15
16use llvm_bitcursor::BitCursor;
17use llvm_support::BITCODE_WRAPPER_MAGIC;
18
19use crate::error::Error;
20use crate::parser::StreamEntry;
21
22/// A representation of the wrapper structure for a bitstream.
23#[derive(Debug)]
24pub struct BitcodeWrapper {
25    /// The magic for this wrapper.
26    pub magic: u32,
27    /// The version for this wrapper.
28    pub version: u32,
29    /// The offset to the actual bitstream.
30    pub offset: u32,
31    /// The size of the wrapped bitstream.
32    pub size: u32,
33    /// A target-specific value that encodes the CPU type.
34    pub cpu_type: u32,
35}
36
37/// Represents an overarching bitstream container.
38///
39/// This struct is responsible for managing two pieces of state:
40/// 1. The application-specific magic that identifies the input
41/// 2. An underlying [`StreamParser`](crate::parser::StreamParser) that can
42///    be advanced to produce individual blocks and records within the bitstream.
43#[derive(Debug)]
44pub struct Bitstream<T: AsRef<[u8]>> {
45    /// The application-specific magic associated with this bitstream.
46    pub magic: u32,
47    parser: parser::StreamParser<T>,
48}
49
50impl<T: AsRef<[u8]>> Bitstream<T> {
51    fn from_cursor(mut cur: BitCursor<T>) -> Result<Self, Error> {
52        // This isn't documented anywhere, but LLVM's BitcodeReader requires
53        // all inputs to be 4-byte aligned.
54        // See: `llvm::initStream` in `Bitcode/Reader/BitcodeReader.cpp`.
55        if cur.byte_len() % 4 != 0 {
56            return Err(Error::BadContainer("input is not 4-byte aligned".into()));
57        }
58
59        // Every bitstream starts with an aligned, 32-bit magic field.
60        // There's absolutely no point in continuing the parse if we fail here.
61        Ok(Self {
62            magic: cur.read_exact::<u32>().map_err(|e| {
63                Error::BadContainer(format!(
64                    "bitstream should have begun with magic, but errored: {:?}",
65                    e
66                ))
67            })?,
68            parser: parser::StreamParser::new(cur),
69        })
70    }
71
72    /// Intelligently create a new `Bitstream` from the given source, parsing
73    /// the bitcode wrapper if necessary.
74    pub fn from(inner: T) -> Result<(Option<BitcodeWrapper>, Self), Error> {
75        log::debug!("beginning intelligent parse");
76        let mut cur = BitCursor::new(&inner);
77
78        // Read the magic to determine which parse strategy to use.
79        let magic = cur.read_exact::<u32>()?;
80
81        // The only wrapper we currently know is the bitcode wrapper.
82        // If our magic doesn't match that, then we try the raw parser.
83        if magic == BITCODE_WRAPPER_MAGIC {
84            log::debug!("input looks like a bitcode wrapper!");
85            let (wrapper, parser) = Self::from_wrapped(inner)?;
86            Ok((Some(wrapper), parser))
87        } else {
88            log::debug!("input is probably a raw bitstream!");
89            Ok((None, Self::from_raw(inner)?))
90        }
91    }
92
93    /// Create a new `Bitstream` from the given source.
94    ///
95    /// **NOTE**: This function assumes that it's being given a "raw" bitstream,
96    /// i.e. not one that's been wrapped with another container (such as the
97    /// bitcode wrapper format). To parse a wrapped bitstream, use the
98    /// [`from_wrapped`](Bitstream::from_wrapped) API.
99    pub fn from_raw(inner: T) -> Result<Self, Error> {
100        let cur = BitCursor::new(inner);
101        Self::from_cursor(cur)
102    }
103
104    /// Create a new `Bitstream` from the given wrapped source.
105    ///
106    /// The source is parsed as if it begins with a
107    /// [bitcode wrapper](https://llvm.org/docs/BitCodeFormat.html#bitcode-wrapper-format).
108    /// "Raw" inputs should be parsed with [`from_raw`](Bitstream::from_raw) instead.
109    pub fn from_wrapped(inner: T) -> Result<(BitcodeWrapper, Self), Error> {
110        let mut cur = BitCursor::new(&inner);
111
112        let wrapper = BitcodeWrapper {
113            magic: cur.read_exact::<u32>()?,
114            version: cur.read_exact::<u32>()?,
115            offset: cur.read_exact::<u32>()?,
116            size: cur.read_exact::<u32>()?,
117            cpu_type: cur.read_exact::<u32>()?,
118        };
119
120        // NOTE(ww): The `new_with_len` API is a little bit silly -- ideally we'd just
121        // take a slice of `inner` and create a new `BitCursor` with it, but we can't do
122        // that while preserving the generic `T` bound.
123        // The manual fixup (+ 20) is another artifact of this -- we keep the wrapper header
124        // in the new cursor to make the offsets more intelligible, which means that we
125        // also need to extend the end of our cursor's buffer.
126        let actual_length = (wrapper.size as usize) + 20;
127        let mut cur = BitCursor::new_with_len(inner, actual_length)?;
128
129        cur.seek(SeekFrom::Start(wrapper.offset.into()))
130            .map_err(|e| {
131                Error::StreamParse(format!("couldn't seek past bitcode wrapper: {:?}", e))
132            })?;
133        Ok((wrapper, Self::from_cursor(cur)?))
134    }
135
136    /// Advance the underlying bitstream parser by one entry.
137    ///
138    /// NOTE: Most users should prefer the iterator implementation.
139    pub fn advance(&mut self) -> Result<StreamEntry, Error> {
140        self.parser.advance()
141    }
142}
143
144impl<T: AsRef<[u8]>> Iterator for Bitstream<T> {
145    type Item = Result<StreamEntry, Error>;
146
147    fn next(&mut self) -> Option<Self::Item> {
148        match self.advance() {
149            Ok(entry) => Some(Ok(entry)),
150            Err(Error::Exhausted) => None,
151            Err(e) => Some(Err(e)),
152        }
153    }
154}
155
156#[cfg(test)]
157mod tests {}