llvm-bitstream 0.0.3

A content-agnostic parser for LLVM's bitstream container format
Documentation
//! `llvm-bitstream` is a library for interpreting files in LLVM's
//! [bitstream format](https://llvm.org/docs/BitCodeFormat.html).

#![deny(rustdoc::broken_intra_doc_links)]
#![deny(missing_docs)]
#![allow(clippy::redundant_field_names)]
#![forbid(unsafe_code)]

pub mod abbrev;
pub mod error;
pub mod parser;
pub mod record;

use std::io::{Seek, SeekFrom};

use llvm_bitcursor::BitCursor;
use llvm_support::BITCODE_WRAPPER_MAGIC;

use crate::error::Error;
use crate::parser::StreamEntry;

/// A representation of the wrapper structure for a bitstream.
#[derive(Debug)]
pub struct BitcodeWrapper {
    /// The magic for this wrapper.
    pub magic: u32,
    /// The version for this wrapper.
    pub version: u32,
    /// The offset to the actual bitstream.
    pub offset: u32,
    /// The size of the wrapped bitstream.
    pub size: u32,
    /// A target-specific value that encodes the CPU type.
    pub cpu_type: u32,
}

/// Represents an overarching bitstream container.
///
/// This struct is responsible for managing two pieces of state:
/// 1. The application-specific magic that identifies the input
/// 2. An underlying [`StreamParser`](crate::parser::StreamParser) that can
///    be advanced to produce individual blocks and records within the bitstream.
#[derive(Debug)]
pub struct Bitstream<T: AsRef<[u8]>> {
    /// The application-specific magic associated with this bitstream.
    pub magic: u32,
    parser: parser::StreamParser<T>,
}

impl<T: AsRef<[u8]>> Bitstream<T> {
    fn from_cursor(mut cur: BitCursor<T>) -> Result<Self, Error> {
        // This isn't documented anywhere, but LLVM's BitcodeReader requires
        // all inputs to be 4-byte aligned.
        // See: `llvm::initStream` in `Bitcode/Reader/BitcodeReader.cpp`.
        if cur.byte_len() % 4 != 0 {
            return Err(Error::BadContainer("input is not 4-byte aligned".into()));
        }

        // Every bitstream starts with an aligned, 32-bit magic field.
        // There's absolutely no point in continuing the parse if we fail here.
        Ok(Self {
            magic: cur.read_exact::<u32>().map_err(|e| {
                Error::BadContainer(format!(
                    "bitstream should have begun with magic, but errored: {:?}",
                    e
                ))
            })?,
            parser: parser::StreamParser::new(cur),
        })
    }

    /// Intelligently create a new `Bitstream` from the given source, parsing
    /// the bitcode wrapper if necessary.
    pub fn from(inner: T) -> Result<(Option<BitcodeWrapper>, Self), Error> {
        log::debug!("beginning intelligent parse");
        let mut cur = BitCursor::new(&inner);

        // Read the magic to determine which parse strategy to use.
        let magic = cur.read_exact::<u32>()?;

        // The only wrapper we currently know is the bitcode wrapper.
        // If our magic doesn't match that, then we try the raw parser.
        if magic == BITCODE_WRAPPER_MAGIC {
            log::debug!("input looks like a bitcode wrapper!");
            let (wrapper, parser) = Self::from_wrapped(inner)?;
            Ok((Some(wrapper), parser))
        } else {
            log::debug!("input is probably a raw bitstream!");
            Ok((None, Self::from_raw(inner)?))
        }
    }

    /// Create a new `Bitstream` from the given source.
    ///
    /// **NOTE**: This function assumes that it's being given a "raw" bitstream,
    /// i.e. not one that's been wrapped with another container (such as the
    /// bitcode wrapper format). To parse a wrapped bitstream, use the
    /// [`from_wrapped`](Bitstream::from_wrapped) API.
    pub fn from_raw(inner: T) -> Result<Self, Error> {
        let cur = BitCursor::new(inner);
        Self::from_cursor(cur)
    }

    /// Create a new `Bitstream` from the given wrapped source.
    ///
    /// The source is parsed as if it begins with a
    /// [bitcode wrapper](https://llvm.org/docs/BitCodeFormat.html#bitcode-wrapper-format).
    /// "Raw" inputs should be parsed with [`from_raw`](Bitstream::from_raw) instead.
    pub fn from_wrapped(inner: T) -> Result<(BitcodeWrapper, Self), Error> {
        let mut cur = BitCursor::new(&inner);

        let wrapper = BitcodeWrapper {
            magic: cur.read_exact::<u32>()?,
            version: cur.read_exact::<u32>()?,
            offset: cur.read_exact::<u32>()?,
            size: cur.read_exact::<u32>()?,
            cpu_type: cur.read_exact::<u32>()?,
        };

        // NOTE(ww): The `new_with_len` API is a little bit silly -- ideally we'd just
        // take a slice of `inner` and create a new `BitCursor` with it, but we can't do
        // that while preserving the generic `T` bound.
        // The manual fixup (+ 20) is another artifact of this -- we keep the wrapper header
        // in the new cursor to make the offsets more intelligible, which means that we
        // also need to extend the end of our cursor's buffer.
        let actual_length = (wrapper.size as usize) + 20;
        let mut cur = BitCursor::new_with_len(inner, actual_length)?;

        cur.seek(SeekFrom::Start(wrapper.offset.into()))
            .map_err(|e| {
                Error::StreamParse(format!("couldn't seek past bitcode wrapper: {:?}", e))
            })?;
        Ok((wrapper, Self::from_cursor(cur)?))
    }

    /// Advance the underlying bitstream parser by one entry.
    ///
    /// NOTE: Most users should prefer the iterator implementation.
    pub fn advance(&mut self) -> Result<StreamEntry, Error> {
        self.parser.advance()
    }
}

impl<T: AsRef<[u8]>> Iterator for Bitstream<T> {
    type Item = Result<StreamEntry, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.advance() {
            Ok(entry) => Some(Ok(entry)),
            Err(Error::Exhausted) => None,
            Err(e) => Some(Err(e)),
        }
    }
}

#[cfg(test)]
mod tests {}