llvm_bitstream/lib.rs
1//! `llvm-bitstream` is a library for interpreting files in LLVM's
2//! [bitstream format](https://llvm.org/docs/BitCodeFormat.html).
3
4#![deny(rustdoc::broken_intra_doc_links)]
5#![deny(missing_docs)]
6#![allow(clippy::redundant_field_names)]
7#![forbid(unsafe_code)]
8
9pub mod abbrev;
10pub mod error;
11pub mod parser;
12pub mod record;
13
14use std::io::{Seek, SeekFrom};
15
16use llvm_bitcursor::BitCursor;
17use llvm_support::BITCODE_WRAPPER_MAGIC;
18
19use crate::error::Error;
20use crate::parser::StreamEntry;
21
22/// A representation of the wrapper structure for a bitstream.
23#[derive(Debug)]
24pub struct BitcodeWrapper {
25 /// The magic for this wrapper.
26 pub magic: u32,
27 /// The version for this wrapper.
28 pub version: u32,
29 /// The offset to the actual bitstream.
30 pub offset: u32,
31 /// The size of the wrapped bitstream.
32 pub size: u32,
33 /// A target-specific value that encodes the CPU type.
34 pub cpu_type: u32,
35}
36
37/// Represents an overarching bitstream container.
38///
39/// This struct is responsible for managing two pieces of state:
40/// 1. The application-specific magic that identifies the input
41/// 2. An underlying [`StreamParser`](crate::parser::StreamParser) that can
42/// be advanced to produce individual blocks and records within the bitstream.
43#[derive(Debug)]
44pub struct Bitstream<T: AsRef<[u8]>> {
45 /// The application-specific magic associated with this bitstream.
46 pub magic: u32,
47 parser: parser::StreamParser<T>,
48}
49
50impl<T: AsRef<[u8]>> Bitstream<T> {
51 fn from_cursor(mut cur: BitCursor<T>) -> Result<Self, Error> {
52 // This isn't documented anywhere, but LLVM's BitcodeReader requires
53 // all inputs to be 4-byte aligned.
54 // See: `llvm::initStream` in `Bitcode/Reader/BitcodeReader.cpp`.
55 if cur.byte_len() % 4 != 0 {
56 return Err(Error::BadContainer("input is not 4-byte aligned".into()));
57 }
58
59 // Every bitstream starts with an aligned, 32-bit magic field.
60 // There's absolutely no point in continuing the parse if we fail here.
61 Ok(Self {
62 magic: cur.read_exact::<u32>().map_err(|e| {
63 Error::BadContainer(format!(
64 "bitstream should have begun with magic, but errored: {:?}",
65 e
66 ))
67 })?,
68 parser: parser::StreamParser::new(cur),
69 })
70 }
71
72 /// Intelligently create a new `Bitstream` from the given source, parsing
73 /// the bitcode wrapper if necessary.
74 pub fn from(inner: T) -> Result<(Option<BitcodeWrapper>, Self), Error> {
75 log::debug!("beginning intelligent parse");
76 let mut cur = BitCursor::new(&inner);
77
78 // Read the magic to determine which parse strategy to use.
79 let magic = cur.read_exact::<u32>()?;
80
81 // The only wrapper we currently know is the bitcode wrapper.
82 // If our magic doesn't match that, then we try the raw parser.
83 if magic == BITCODE_WRAPPER_MAGIC {
84 log::debug!("input looks like a bitcode wrapper!");
85 let (wrapper, parser) = Self::from_wrapped(inner)?;
86 Ok((Some(wrapper), parser))
87 } else {
88 log::debug!("input is probably a raw bitstream!");
89 Ok((None, Self::from_raw(inner)?))
90 }
91 }
92
93 /// Create a new `Bitstream` from the given source.
94 ///
95 /// **NOTE**: This function assumes that it's being given a "raw" bitstream,
96 /// i.e. not one that's been wrapped with another container (such as the
97 /// bitcode wrapper format). To parse a wrapped bitstream, use the
98 /// [`from_wrapped`](Bitstream::from_wrapped) API.
99 pub fn from_raw(inner: T) -> Result<Self, Error> {
100 let cur = BitCursor::new(inner);
101 Self::from_cursor(cur)
102 }
103
104 /// Create a new `Bitstream` from the given wrapped source.
105 ///
106 /// The source is parsed as if it begins with a
107 /// [bitcode wrapper](https://llvm.org/docs/BitCodeFormat.html#bitcode-wrapper-format).
108 /// "Raw" inputs should be parsed with [`from_raw`](Bitstream::from_raw) instead.
109 pub fn from_wrapped(inner: T) -> Result<(BitcodeWrapper, Self), Error> {
110 let mut cur = BitCursor::new(&inner);
111
112 let wrapper = BitcodeWrapper {
113 magic: cur.read_exact::<u32>()?,
114 version: cur.read_exact::<u32>()?,
115 offset: cur.read_exact::<u32>()?,
116 size: cur.read_exact::<u32>()?,
117 cpu_type: cur.read_exact::<u32>()?,
118 };
119
120 // NOTE(ww): The `new_with_len` API is a little bit silly -- ideally we'd just
121 // take a slice of `inner` and create a new `BitCursor` with it, but we can't do
122 // that while preserving the generic `T` bound.
123 // The manual fixup (+ 20) is another artifact of this -- we keep the wrapper header
124 // in the new cursor to make the offsets more intelligible, which means that we
125 // also need to extend the end of our cursor's buffer.
126 let actual_length = (wrapper.size as usize) + 20;
127 let mut cur = BitCursor::new_with_len(inner, actual_length)?;
128
129 cur.seek(SeekFrom::Start(wrapper.offset.into()))
130 .map_err(|e| {
131 Error::StreamParse(format!("couldn't seek past bitcode wrapper: {:?}", e))
132 })?;
133 Ok((wrapper, Self::from_cursor(cur)?))
134 }
135
136 /// Advance the underlying bitstream parser by one entry.
137 ///
138 /// NOTE: Most users should prefer the iterator implementation.
139 pub fn advance(&mut self) -> Result<StreamEntry, Error> {
140 self.parser.advance()
141 }
142}
143
144impl<T: AsRef<[u8]>> Iterator for Bitstream<T> {
145 type Item = Result<StreamEntry, Error>;
146
147 fn next(&mut self) -> Option<Self::Item> {
148 match self.advance() {
149 Ok(entry) => Some(Ok(entry)),
150 Err(Error::Exhausted) => None,
151 Err(e) => Some(Err(e)),
152 }
153 }
154}
155
156#[cfg(test)]
157mod tests {}