1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
//! Core parsing functionality for `llvm-bitstream`.
use std::collections::HashMap;
use std::convert::TryInto;
use std::iter;
use llvm_bitcursor::BitCursor;
use llvm_support::bitcodes::{BlockInfoCode, ReservedAbbrevId, ReservedBlockId};
use llvm_support::{FIRST_APPLICATION_ABBREV_ID, INITIAL_ABBREV_ID_WIDTH};
use crate::abbrev::{self, AbbrevId};
use crate::error::Error;
use crate::record::{Block, Fields, Record};
/// The kinds of entries we can see while advancing through the bitstream.
/// Abbreviations are handled transparently by the parser, and thus are
/// never surfaced as `StreamEntry` values.
#[derive(Debug)]
pub enum StreamEntry {
/// The end of a block scope.
EndBlock,
/// The beginning of a new block scope, for a block with the given ID.
SubBlock(Block),
/// The beginning of a new record within the current scope, with the given
/// abbreviation ID.
Record(Record),
}
impl StreamEntry {
/// Consumes this `StreamEntry` and returns its inner [Block](crate::record::Block), if it is
/// in fact a block.
///
/// If the entry is not a block, returns `None.
pub fn as_block(self) -> Option<Block> {
match self {
StreamEntry::SubBlock(block) => Some(block),
_ => None,
}
}
}
/// Represents the necessary parse state for a particular scope in the bitstream.
///
/// Note that a scope does not *necessarily* correspond to a block: every
/// parser begins with an initial non-block scope before the first block is encountered.
#[derive(Debug)]
enum Scope {
Initial,
Block {
abbrev_id_width: u64,
block_id: u64,
blockinfo_block_id: Option<u64>,
abbrevs: Vec<abbrev::Abbrev>,
},
}
impl Default for Scope {
fn default() -> Self {
Self::Initial
}
}
impl Scope {
/// Returns a new (block) scope.
pub(self) fn new(abbrev_id_width: u64, block_id: u64) -> Self {
Self::Block {
abbrev_id_width: abbrev_id_width,
block_id: block_id,
blockinfo_block_id: None,
abbrevs: vec![],
}
}
/// Returns the current width used for abbreviation IDs.
pub(self) fn abbrev_id_width(&self) -> u64 {
match self {
Scope::Initial => INITIAL_ABBREV_ID_WIDTH,
Scope::Block {
abbrev_id_width, ..
} => *abbrev_id_width,
}
}
/// Extend the current (block) scope's abbreviation definition list with the given
/// iterator.
///
/// Returns an error if used on a non-block scope.
pub(self) fn extend_abbrevs(
&mut self,
new_abbrevs: impl iter::IntoIterator<Item = abbrev::Abbrev>,
) -> Result<(), Error> {
match self {
Scope::Initial => Err(Error::BadScope(
"non-block scope cannot reference abbreviations".into(),
)),
Scope::Block { abbrevs, .. } => {
abbrevs.extend(new_abbrevs);
Ok(())
}
}
}
/// Return a reference to the abbreviation definition with the given `abbrev_id`.
///
/// Returns an error if the scope cannot contain abbreviation definitions or does
/// not have one for the given ID.
pub(self) fn get_abbrev(&self, abbrev_id: u64) -> Result<&abbrev::Abbrev, Error> {
match self {
Scope::Initial => Err(Error::BadScope(
"non-block scope cannot contain records".into(),
)),
Scope::Block { abbrevs, .. } => {
let idx = (abbrev_id as usize) - FIRST_APPLICATION_ABBREV_ID;
abbrevs.get(idx).ok_or(Error::BadAbbrev(abbrev_id))
}
}
}
/// Returns `true` if this scope corresponds to a `BLOCKINFO` block.
///
/// This keeps the [`StreamParser`](StreamParser) honest when determining
/// which blocks and/or records to emit entries for.
pub(self) fn is_blockinfo(&self) -> bool {
match self {
Scope::Initial => false,
Scope::Block { block_id, .. } => *block_id == ReservedBlockId::BlockInfo as u64,
}
}
/// Returns the last block ID recorded with `SETBID` in the `BLOCKINFO` block.
///
/// This function's return is only sensible in the context of a scope corresponding
/// to `BLOCKINFO`. Use on any other scope constitutes API misuse.
pub(self) fn blockinfo_block_id(&self) -> Option<u64> {
match self {
Scope::Initial => None,
Scope::Block {
blockinfo_block_id, ..
} => *blockinfo_block_id,
}
}
/// Sets the current block ID for the `BLOCKINFO` block's state machine.
///
/// Returns an error if requested in a nonsense context, such as on any
/// non-`BLOCKINFO` scope.
pub(self) fn set_blockinfo_block_id(&mut self, new_bid: u64) -> Result<(), Error> {
if let Scope::Block {
blockinfo_block_id, ..
} = self
{
*blockinfo_block_id = Some(new_bid);
return Ok(());
}
Err(Error::BadScope(
"can't set BLOCKINFO block ID for non-BLOCKINFO scope".into(),
))
}
}
/// A parser for individual bitstream entries.
///
/// This structure is **not** a general-purpose parser for bitstream inputs:
/// it expects to be given a prepared [`BitCursor`](BitCursor) whose internal
/// state is correct (i.e., has been advanced past the initial input magic).
///
/// For a general-purpose parser with the correct state management, see
/// [`Bitstream`](crate::Bitstream).
#[derive(Debug)]
pub struct StreamParser<T: AsRef<[u8]>> {
cursor: BitCursor<T>,
scopes: Vec<Scope>,
blockinfo: HashMap<u64, Vec<abbrev::Abbrev>>,
}
impl<T: AsRef<[u8]>> StreamParser<T> {
/// Create a new `StreamParser` from the given `BitCursor`.
///
/// See the struct-level documentation for caveats.
pub(crate) fn new(cur: BitCursor<T>) -> Self {
Self {
cursor: cur,
scopes: vec![Scope::default()],
blockinfo: Default::default(),
}
}
/// Returns the current scope.
fn scope(&self) -> &Scope {
// Unwrap safety: `scopes` is always created with at least one scope, so
// `last()` cannot fail.
#[allow(clippy::unwrap_used)]
self.scopes.last().unwrap()
}
/// Returns the current scope as a mutable reference.
fn scope_mut(&mut self) -> &mut Scope {
// Unwrap safety: `scopes` is always created with at least one scope, so
// `last()` cannot fail.
#[allow(clippy::unwrap_used)]
self.scopes.last_mut().unwrap()
}
/// Enter a block, creating the appropriate scope state for interpreting
/// records within the block.
///
/// If this block is a "metadata" one (e.g., `BLOCKINFO`), returns `None`.
fn enter_block(&mut self) -> Result<Option<StreamEntry>, Error> {
let block_id = self.cursor.read_vbr(8)?;
let new_width = self.cursor.read_vbr(4)?;
self.cursor.align32();
if new_width < 1 {
return Err(Error::BadScope(format!(
"can't enter block: invalid code side: {}",
new_width
)));
}
// The encoded block length is measured in 32-bit words, so our
// actual block length in bytes is the word count times the bytesize
// of each word.
let block_len = self.cursor.read(32)? * 4;
log::debug!(
"entered block: ID={}, new abbrev width={}, block_len={} @ bit position {}",
block_id,
new_width,
block_len,
self.cursor.tell_bit()
);
// Create a new scope for the block we've just entered.
self.scopes.push(Scope::new(new_width, block_id));
// If our blockinfo map contains any abbrevs for the current block ID, add them here.
if let Some(abbrevs) = self.blockinfo.get(&block_id).map(|a| a.to_vec()) {
self.scope_mut().extend_abbrevs(abbrevs)?;
}
// If we've just entered a BLOCKINFO block, return `None` to avoid
// surfacing parse details to the `advance()` API.
if self.scope().is_blockinfo() {
return Ok(None);
}
// Otherwise, return an appropriate entry.
Ok(Some(StreamEntry::SubBlock(Block {
block_id: block_id,
len: block_len,
})))
}
/// Exit a block, returning the scope to the appropriate state for the parent block.
fn exit_block(&mut self) -> Result<Option<StreamEntry>, Error> {
// An END_BLOCK record just aligns the stream.
self.cursor.align32();
// NOTE(ww): We never allow an END_BLOCK to pop the last scope,
// since the last scope is synthetic and does not correspond to a real block.
if self.scopes.len() <= 1 {
return Err(Error::BadScope(
"malformed stream: cannot perform END_BLOCK because scope stack is empty".into(),
));
}
// Unwrap safety: we check for at least one scope above, so this cannot fail.
#[allow(clippy::unwrap_used)]
let scope = self.scopes.pop().unwrap();
log::debug!("exit_block: new active scope is {:?}", self.scope());
// If we're exiting a BLOCKINFO, we have nothing to return.
if scope.is_blockinfo() {
return Ok(None);
}
Ok(Some(StreamEntry::EndBlock))
}
/// Interpret a `DEFINE_ABBREV` record.
fn define_abbrev(&mut self) -> Result<(), Error> {
let abbrev = abbrev::Abbrev::new(&mut self.cursor)?;
log::debug!("new abbrev: {:?}", abbrev);
// `DEFINE_ABBREV` occurs in two contexts: either in a `BLOCKINFO`
// block (where it affects all blocks with block ID defined by the current `SETBID`),
// or in any other block, where it affects only the current scope.
// For the latter case we assume that any `BLOCKINFO`-defined abbrevs have
// already been loaded into the current scope.
if self.scope().is_blockinfo() {
let block_id = self.scope().blockinfo_block_id().ok_or_else(|| {
Error::StreamParse("DEFINE_ABBREV in BLOCKINFO but no preceding SETBID".into())
})?;
self.blockinfo
.entry(block_id)
.or_insert_with(Vec::new)
.push(abbrev);
} else {
self.scope_mut().extend_abbrevs(iter::once(abbrev))?;
}
Ok(())
}
/// Interpret an `UNABBREV_RECORD` record.
fn parse_unabbrev(&mut self) -> Result<Option<StreamEntry>, Error> {
// Sanity check: `UNABBREV_RECORD` can only occur inside a block,
// so the current scope must be a block.
if matches!(self.scope(), Scope::Initial) {
return Err(Error::StreamParse(
"UNABBREV_RECORD outside of any block scope".into(),
));
}
// An unabbrev record looks like this:
// [code:VBR6, numops:VBR6, op0:VBR6, op1:VBR6, ...]
// This isn't worth generalizing, so do it all in the body here.
let code: u64 = self.cursor.read_vbr(6)?;
let num_opnds = self.cursor.read_vbr(6)?;
log::debug!("unabbrev record code={}, num_opnds={}", code, num_opnds);
let mut fields: Fields = Vec::with_capacity(num_opnds as usize);
for _ in 0..num_opnds {
fields.push(self.cursor.read_vbr(6)?);
}
let record = Record::from_unabbrev(code, fields);
if self.scope().is_blockinfo() {
let code: BlockInfoCode = record.code.try_into()?;
match code {
BlockInfoCode::SetBid => {
let block_id: u64 = record.fields[0];
log::debug!("SETBID: BLOCKINFO block ID is now {}", block_id);
self.scope_mut().set_blockinfo_block_id(block_id)?;
}
BlockInfoCode::BlockName => log::debug!("skipping BLOCKNAME code in BLOCKINFO"),
BlockInfoCode::SetRecordName => {
log::debug!("skipping SETRECORDNAME code in BLOCKINFO")
}
o => log::debug!("skipping unsupported record {:?} in BLOCKINFO", o),
};
return Ok(None);
}
Ok(Some(StreamEntry::Record(record)))
}
/// Interpret a record using its corresponding abbreviation definition.
fn parse_with_abbrev(&mut self, abbrev_id: u64) -> Result<Option<StreamEntry>, Error> {
// To parse a record according to an abbreviation definition, we
// fetch the corresponding abbreviation (failing if we don't have one),
// then use the abbreviation for the parse.
// TODO(ww): The clone at the end here is a little annoying, but we
// need it to avoid mixing mutable and immutable borrows here.
// There is absolutely a better way to do that.
let abbrev = self.scope().get_abbrev(abbrev_id)?.clone();
let mut fields = abbrev.parse(&mut self.cursor)?;
log::debug!("parsed fields: {:?}", fields);
// Panic safety: every abbrev contains at least one operand, so this cannot panic.
// We also expect the first operand to always be a u64, indicating the record code.
let code: u64 = fields.remove(0);
if self.scope().is_blockinfo() {
return Ok(None);
}
Ok(Some(StreamEntry::Record(Record {
abbrev_id: Some(abbrev_id),
code: code,
fields: fields,
})))
}
/// Return the next [`StreamEntry`](StreamEntry) in this bitstream.
///
/// Returns an error on any parsing error, *or* the special
/// [`Error::Exhausted`](Error::Exhausted) if the bitstream has
/// been fully consumed.
pub fn advance(&mut self) -> Result<StreamEntry, Error> {
if self.cursor.exhausted() {
return Err(Error::Exhausted);
}
log::debug!(
"advancing, current scope: {:?} @ bit position {}",
self.scope(),
self.cursor.tell_bit()
);
// To return the next stream entry, we read the next abbreviation ID using
// our current width. The abbreviation ID we read determines our subsequent
// parse strategy and the kind of entry we return.
let id: abbrev::AbbrevId = self
.cursor
.read(self.scope().abbrev_id_width() as usize)?
.into();
log::debug!("next entry ID: {:?}", id);
// NOTE(ww): The strange `map` + `unwrap_or_else` pattern below is to keep the parser
// generalized without having to return `StreamEntries` that correspond to
// parse details that a stream consumer shouldn't have to be aware of
// (such as abbrev definitions and the BLOCKINFO block).
match id {
AbbrevId::Reserved(ReservedAbbrevId::EndBlock) => {
self.exit_block()?.map(Ok).unwrap_or_else(|| self.advance())
}
AbbrevId::Reserved(ReservedAbbrevId::EnterSubBlock) => self
.enter_block()?
.map(Ok)
.unwrap_or_else(|| self.advance()),
AbbrevId::Reserved(ReservedAbbrevId::DefineAbbrev) => {
// DEFINE_ABBREV is always a parse detail, so we don't even bother
// trying to return a StreamEntry for it.
self.define_abbrev()?;
self.advance()
}
AbbrevId::Reserved(ReservedAbbrevId::UnabbrevRecord) => self
.parse_unabbrev()?
.map(Ok)
.unwrap_or_else(|| self.advance()),
AbbrevId::Defined(abbrev_id) => self
.parse_with_abbrev(abbrev_id)?
.map(Ok)
.unwrap_or_else(|| self.advance()),
}
}
}