1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
//! `llvm-bitstream` is a library for interpreting files in LLVM's
//! [bitstream format](https://llvm.org/docs/BitCodeFormat.html).
#![deny(broken_intra_doc_links)]
#![deny(missing_docs)]
#![allow(clippy::redundant_field_names)]
#![forbid(unsafe_code)]
pub mod abbrev;
pub mod error;
pub mod parser;
pub mod record;
use std::io::{Seek, SeekFrom};
use llvm_bitcursor::BitCursor;
use llvm_constants::BITCODE_WRAPPER_MAGIC;
use crate::error::Error;
use crate::parser::StreamEntry;
/// A representation of the wrapper structure for a bitstream.
#[derive(Debug)]
pub struct BitcodeWrapper {
/// The magic for this wrapper.
pub magic: u32,
/// The version for this wrapper.
pub version: u32,
/// The offset to the actual bitstream.
pub offset: u32,
/// The size of the wrapped bitstream.
pub size: u32,
/// A target-specific value that encodes the CPU type.
pub cpu_type: u32,
}
/// Represents an overarching bitstream container.
///
/// This struct is responsible for managing two pieces of state:
/// 1. The application-specific magic that identifies the input
/// 2. An underlying [`StreamParser`](crate::parser::StreamParser) that can
/// be advanced to produce individual blocks and records within the bitstream.
#[derive(Debug)]
pub struct Bitstream<T: AsRef<[u8]>> {
/// The application-specific magic associated with this bitstream.
pub magic: u32,
parser: parser::StreamParser<T>,
}
impl<T: AsRef<[u8]>> Bitstream<T> {
fn from_cursor(mut cur: BitCursor<T>) -> Result<Self, Error> {
// This isn't documented anywhere, but LLVM's BitcodeReader requires
// all inputs to be 4-byte aligned.
// See: `llvm::initStream` in `Bitcode/Reader/BitcodeReader.cpp`.
if cur.byte_len() % 4 != 0 {
return Err(Error::BadContainer("input is not 4-byte aligned".into()));
}
// Every bitstream starts with an aligned, 32-bit magic field.
// There's absolutely no point in continuing the parse if we fail here.
Ok(Self {
magic: cur.read_exact::<u32>().map_err(|e| {
Error::BadContainer(format!(
"bitstream should have begun with magic, but errored: {:?}",
e
))
})?,
parser: parser::StreamParser::new(cur),
})
}
/// Intelligently create a new `Bitstream` from the given source, parsing
/// the bitcode wrapper if necessary.
pub fn from(inner: T) -> Result<(Option<BitcodeWrapper>, Self), Error> {
log::debug!("beginning intelligent parse");
let mut cur = BitCursor::new(&inner);
// Read the magic to determine which parse strategy to use.
let magic = cur.read_exact::<u32>()?;
// The only wrapper we currently know is the bitcode wrapper.
// If our magic doesn't match that, then we try the raw parser.
if magic == BITCODE_WRAPPER_MAGIC {
log::debug!("input looks like a bitcode wrapper!");
let (wrapper, parser) = Self::from_wrapped(inner)?;
Ok((Some(wrapper), parser))
} else {
log::debug!("input is probably a raw bitstream!");
Ok((None, Self::from_raw(inner)?))
}
}
/// Create a new `Bitstream` from the given source.
///
/// **NOTE**: This function assumes that it's being given a "raw" bitstream,
/// i.e. not one that's been wrapped with another container (such as the
/// bitcode wrapper format). To parse a wrapped bitstream, use the
/// [`from_wrapped`](Bitstream::from_wrapped) API.
pub fn from_raw(inner: T) -> Result<Self, Error> {
let cur = BitCursor::new(inner);
Self::from_cursor(cur)
}
/// Create a new `Bitstream` from the given wrapped source.
///
/// The source is parsed as if it begins with a
/// [bitcode wrapper](https://llvm.org/docs/BitCodeFormat.html#bitcode-wrapper-format).
/// "Raw" inputs should be parsed with [`from_raw`](Bitstream::from_raw) instead.
pub fn from_wrapped(inner: T) -> Result<(BitcodeWrapper, Self), Error> {
let mut cur = BitCursor::new(&inner);
let wrapper = BitcodeWrapper {
magic: cur.read_exact::<u32>()?,
version: cur.read_exact::<u32>()?,
offset: cur.read_exact::<u32>()?,
size: cur.read_exact::<u32>()?,
cpu_type: cur.read_exact::<u32>()?,
};
// NOTE(ww): The `new_with_len` API is a little bit silly -- ideally we'd just
// take a slice of `inner` and create a new `BitCursor` with it, but we can't do
// that while preserving the generic `T` bound.
// The manual fixup (+ 20) is another artifact of this -- we keep the wrapper header
// in the new cursor to make the offsets more intelligible, which means that we
// also need to extend the end of our cursor's buffer.
let actual_length = (wrapper.size as usize) + 20;
let mut cur = BitCursor::new_with_len(inner, actual_length)?;
cur.seek(SeekFrom::Start(wrapper.offset.into()))
.map_err(|e| {
Error::StreamParse(format!("couldn't seek past bitcode wrapper: {:?}", e))
})?;
Ok((wrapper, Self::from_cursor(cur)?))
}
/// Advance the underlying bitstream parser by one entry.
///
/// NOTE: Most users should prefer the iterator implementation.
pub fn advance(&mut self) -> Result<StreamEntry, Error> {
self.parser.advance()
}
}
impl<T: AsRef<[u8]>> Iterator for Bitstream<T> {
type Item = Result<StreamEntry, Error>;
fn next(&mut self) -> Option<Self::Item> {
match self.advance() {
Ok(entry) => Some(Ok(entry)),
Err(Error::Exhausted) => None,
Err(e) => Some(Err(e)),
}
}
}
#[cfg(test)]
mod tests {}