Skip to main content

llvm_bitcode/
read.rs

1use crate::bitstream::{PayloadOperand, ScalarOperand};
2use std::num::TryFromIntError;
3use std::string::FromUtf8Error;
4use std::sync::Arc;
5use std::{collections::HashMap, convert::TryFrom, error, fmt};
6
7use crate::bitcode::{BlockInfo, RecordIter};
8use crate::bits::{self, Cursor};
9use crate::bitstream::{Abbreviation, BlockInfoCode, BuiltinAbbreviationId, Operand};
10use crate::visitor::BitStreamVisitor;
11
12/// Bitstream reader errors
13#[derive(Debug, Clone)]
14pub enum Error {
15    EndOfRecord,
16    ValueOverflow,
17    UnexpectedOperand(Option<Operand>),
18    InvalidSignature(u32),
19    InvalidAbbrev,
20    NestedBlockInBlockInfo,
21    MissingSetBid,
22    InvalidBlockInfoRecord(u64),
23    NoSuchAbbrev { block_id: u32, abbrev_id: u32 },
24    UnexpectedBlock(u32),
25    MissingEndBlock(u32),
26    AbbrevWidthTooSmall(u8),
27    ReadBits(bits::Error),
28    Encoding(FromUtf8Error),
29    Other(&'static str),
30}
31
32impl fmt::Display for Error {
33    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34        match self {
35            Self::EndOfRecord => write!(f, "read past end of record"),
36            Self::ValueOverflow => write!(
37                f,
38                "integer out of range (likely due to misparsing the format)"
39            ),
40            Self::UnexpectedOperand(op) => write!(f, "Unexpected operand {op:?}"),
41            Self::InvalidSignature(sig) => {
42                write!(f, "invalid signature (magic number): 0x{sig:x}")
43            }
44            Self::InvalidAbbrev => write!(f, "invalid abbreviation"),
45            Self::NestedBlockInBlockInfo => {
46                write!(f, "nested block in block info")
47            }
48            Self::UnexpectedBlock(id) => write!(f, "nested block {id}"),
49            Self::MissingSetBid => write!(f, "missing SETBID"),
50            Self::InvalidBlockInfoRecord(record_id) => {
51                write!(f, "invalid block info record `{record_id}`")
52            }
53            Self::AbbrevWidthTooSmall(width) => {
54                write!(f, "abbreviation width `{width}` is too small")
55            }
56            Self::NoSuchAbbrev {
57                block_id,
58                abbrev_id,
59            } => write!(
60                f,
61                "no such abbreviation `{abbrev_id}` in block `{block_id}`"
62            ),
63            Self::MissingEndBlock(block_id) => {
64                write!(f, "missing end block for `{block_id}`")
65            }
66            Self::ReadBits(err) => err.fmt(f),
67            Self::Encoding(err) => err.fmt(f),
68            Self::Other(err) => err.fmt(f),
69        }
70    }
71}
72
73impl error::Error for Error {}
74
75impl From<bits::Error> for Error {
76    fn from(err: bits::Error) -> Self {
77        Self::ReadBits(err)
78    }
79}
80
81impl From<TryFromIntError> for Error {
82    fn from(_: TryFromIntError) -> Self {
83        Self::ValueOverflow
84    }
85}
86
87/// A block can contain either nested blocks or records.
88/// LLVM writes blocks first, but the format allows them to be mixed freely.
89#[derive(Debug)]
90pub enum BlockItem<'cursor, 'input> {
91    /// Recurse
92    Block(BlockIter<'cursor, 'input>),
93    /// Read a record from the current block
94    Record(RecordIter<'cursor, 'input>),
95}
96
97/// Iterator content directly in a block
98#[derive(Debug)]
99pub struct BlockIter<'global_state, 'input> {
100    /// ID of the block being iterated
101    pub id: u32,
102    cursor: Cursor<'input>,
103    abbrev_width: u8,
104    /// Abbreviations defined in this block
105    block_local_abbrevs: Vec<Arc<Abbreviation>>,
106    /// Global abbreviations and names
107    reader: &'global_state mut BitStreamReader,
108}
109
110/// Bitstream reader
111#[derive(Debug, Clone)]
112pub struct BitStreamReader {
113    /// Block information
114    pub(crate) block_info: HashMap<u32, BlockInfo>,
115    global_abbrevs: HashMap<u32, Vec<Arc<Abbreviation>>>,
116}
117
118impl BitStreamReader {
119    /// Top level fake block ID
120    pub const TOP_LEVEL_BLOCK_ID: u32 = u32::MAX;
121
122    #[must_use]
123    pub fn new() -> Self {
124        Self {
125            block_info: HashMap::new(),
126            global_abbrevs: HashMap::new(),
127        }
128    }
129
130    /// Skip `Signature` first
131    pub fn iter_bitcode<'input>(&mut self, bitcode_data: &'input [u8]) -> BlockIter<'_, 'input> {
132        BlockIter::new(self, Cursor::new(bitcode_data), Self::TOP_LEVEL_BLOCK_ID, 2)
133    }
134
135    fn visit_block<V: BitStreamVisitor>(
136        mut block: BlockIter<'_, '_>,
137        visitor: &mut V,
138    ) -> Result<(), Error> {
139        let block_id = block.id;
140        while let Some(item) = block.try_next()? {
141            match item {
142                BlockItem::Block(new_block) => {
143                    let new_id = new_block.id;
144                    if visitor.should_enter_block(new_id) {
145                        Self::visit_block(new_block, visitor)?;
146                        visitor.did_exit_block(new_id);
147                    }
148                }
149                BlockItem::Record(record) => {
150                    visitor.visit(block_id, record.into_record()?);
151                }
152            }
153        }
154        Ok(())
155    }
156
157    /// Read abbreviated operand
158    #[inline(never)]
159    fn read_abbrev_op(cursor: &mut Cursor<'_>, num_ops_left: &mut usize) -> Result<Operand, Error> {
160        if *num_ops_left == 0 {
161            return Err(Error::InvalidAbbrev);
162        }
163        *num_ops_left -= 1;
164
165        let is_literal = cursor.read(1)?;
166        if is_literal == 1 {
167            return Ok(Operand::Scalar(ScalarOperand::Literal(
168                cursor.read_vbr_fixed::<8>()?,
169            )));
170        }
171        let op_type = cursor.read(3)?;
172        Ok(match op_type {
173            1 => {
174                let width = cursor.read_vbr_fixed::<5>()?;
175                if width < 1 || width > 32 {
176                    return Err(Error::AbbrevWidthTooSmall(width as u8));
177                }
178                Operand::Scalar(ScalarOperand::Fixed(width as u8))
179            }
180            2 => {
181                let width = cursor.read_vbr_fixed::<5>()?;
182                if width < 1 || width > 32 {
183                    return Err(Error::AbbrevWidthTooSmall(width as u8));
184                }
185                Operand::Scalar(ScalarOperand::Vbr(width as u8))
186            }
187            3 if *num_ops_left == 1 => {
188                let op = Self::read_abbrev_op(cursor, num_ops_left)?;
189                if let Operand::Scalar(op) = op {
190                    Operand::Payload(PayloadOperand::Array(op))
191                } else {
192                    return Err(Error::UnexpectedOperand(Some(op)));
193                }
194            }
195            4 => Operand::Scalar(ScalarOperand::Char6),
196            5 if *num_ops_left == 0 => Operand::Payload(PayloadOperand::Blob),
197            _ => return Err(Error::InvalidAbbrev),
198        })
199    }
200
201    /// Read abbreviation
202    fn define_abbrev(
203        cursor: &mut Cursor<'_>,
204        abbrevs: &mut Vec<Arc<Abbreviation>>,
205    ) -> Result<(), Error> {
206        let mut num_ops = cursor.read_vbr_fixed::<5>()? as usize;
207
208        let mut fields = Vec::with_capacity(num_ops);
209        let mut payload = None;
210        while num_ops > 0 && fields.len() != fields.capacity() {
211            match Self::read_abbrev_op(cursor, &mut num_ops)? {
212                Operand::Scalar(op) => {
213                    fields.push(op);
214                }
215                Operand::Payload(op) if num_ops == 0 => {
216                    payload = Some(op);
217                }
218                op => return Err(Error::UnexpectedOperand(Some(op))),
219            }
220        }
221        let id = abbrevs.len() as u32;
222        let abbrev = Arc::new(Abbreviation {
223            fields,
224            payload,
225            id,
226        });
227        abbrevs.push(abbrev);
228        Ok(())
229    }
230
231    /// Read block info block
232    fn read_block_info_block(
233        &mut self,
234        cursor: &mut Cursor<'_>,
235        abbrev_width: u8,
236    ) -> Result<(), Error> {
237        use BuiltinAbbreviationId::*;
238
239        let mut current_block_id: Option<u32> = None;
240        loop {
241            let abbrev_id = cursor.read(abbrev_width)? as u32;
242            match BuiltinAbbreviationId::try_from(abbrev_id).map_err(|_| Error::NoSuchAbbrev {
243                block_id: 0,
244                abbrev_id,
245            })? {
246                EndBlock => {
247                    cursor.align32()?;
248                    return Ok(());
249                }
250                EnterSubBlock => {
251                    return Err(Error::NestedBlockInBlockInfo);
252                }
253                DefineAbbreviation => {
254                    let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
255                    Self::define_abbrev(cursor, self.global_abbrevs.entry(block_id).or_default())?;
256                }
257                UnabbreviatedRecord => {
258                    let mut record = RecordIter::from_cursor(cursor)?;
259                    let block = u8::try_from(record.id)
260                        .ok()
261                        .and_then(|c| BlockInfoCode::try_from(c).ok())
262                        .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
263                    match block {
264                        BlockInfoCode::SetBid => {
265                            let id = record
266                                .u32()
267                                .ok()
268                                .filter(|_| record.is_empty())
269                                .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
270                            current_block_id = Some(id);
271                        }
272                        BlockInfoCode::BlockName => {
273                            let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
274                            let block_info = self.block_info.entry(block_id).or_default();
275                            if let Ok(name) = String::from_utf8(record.string()?) {
276                                block_info.name = name;
277                            }
278                        }
279                        BlockInfoCode::SetRecordName => {
280                            let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
281                            let record_id = record
282                                .u64()
283                                .map_err(|_| Error::InvalidBlockInfoRecord(record.id))?;
284                            let block_info = self.block_info.entry(block_id).or_default();
285                            if let Ok(name) = String::from_utf8(record.string()?) {
286                                block_info.record_names.insert(record_id, name);
287                            }
288                        }
289                    }
290                }
291            }
292        }
293    }
294
295    /// Read block with visitor
296    pub fn read_block<V: BitStreamVisitor>(
297        &mut self,
298        cursor: Cursor<'_>,
299        block_id: u32,
300        abbrev_width: u8,
301        visitor: &mut V,
302    ) -> Result<(), Error> {
303        Self::visit_block(
304            BlockIter::new(self, cursor, block_id, abbrev_width),
305            visitor,
306        )
307    }
308}
309
310impl<'global_state, 'input> BlockIter<'global_state, 'input> {
311    pub fn next_record<'parent>(
312        &'parent mut self,
313    ) -> Result<Option<RecordIter<'parent, 'input>>, Error> {
314        match self.try_next()? {
315            None => Ok(None),
316            Some(BlockItem::Record(rec)) => Ok(Some(rec)),
317            Some(BlockItem::Block(block)) => Err(Error::UnexpectedBlock(block.id)),
318        }
319    }
320
321    #[doc(hidden)]
322    #[deprecated(note = "renamed to `try_next`")]
323    pub fn next<'parent>(&'parent mut self) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
324        self.try_next()
325    }
326
327    /// Returns the next item (block or record) in this block
328    pub fn try_next<'parent>(
329        &'parent mut self,
330    ) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
331        if self.cursor.is_at_end() {
332            return if self.id == BitStreamReader::TOP_LEVEL_BLOCK_ID {
333                Ok(None)
334            } else {
335                Err(Error::MissingEndBlock(self.id))
336            };
337        }
338
339        let abbrev_id = self.cursor.read(self.abbrev_width)? as u32;
340
341        if let Ok(builtin_abbrev) = BuiltinAbbreviationId::try_from(abbrev_id) {
342            use BuiltinAbbreviationId::*;
343            match builtin_abbrev {
344                EndBlock => {
345                    self.cursor.align32()?;
346                    Ok(None)
347                }
348                EnterSubBlock => {
349                    let block_id = self.cursor.read_vbr_fixed::<8>()? as u32;
350                    let new_abbrev_width = self.cursor.read_vbr_fixed::<4>()? as u8;
351                    self.cursor.align32()?;
352                    let block_length = self.cursor.read(32)? as usize * 4;
353                    let mut cursor = self.cursor.take_slice(block_length)?;
354
355                    if block_id == 0 {
356                        self.reader
357                            .read_block_info_block(&mut cursor, new_abbrev_width)?;
358                        return self.try_next();
359                    }
360
361                    // Create new block iterator
362                    let block_iter =
363                        BlockIter::new(self.reader, cursor, block_id, new_abbrev_width);
364                    Ok(Some(BlockItem::Block(block_iter)))
365                }
366                DefineAbbreviation => {
367                    BitStreamReader::define_abbrev(
368                        &mut self.cursor,
369                        &mut self.block_local_abbrevs,
370                    )?;
371                    self.try_next()
372                }
373                UnabbreviatedRecord => {
374                    let record_iter = RecordIter::from_cursor(&mut self.cursor)?;
375                    Ok(Some(BlockItem::Record(record_iter)))
376                }
377            }
378        } else {
379            let abbrev_index = abbrev_id as usize - 4;
380            let global_abbrevs = self
381                .reader
382                .global_abbrevs
383                .get(&self.id)
384                .map(|v| v.as_slice())
385                .unwrap_or_default();
386
387            // > Any abbreviations defined in a BLOCKINFO record for the particular block type receive IDs first,
388            // > followed by any abbreviations defined within the block itself.
389            let abbrev = if let Some(local_index) = abbrev_index.checked_sub(global_abbrevs.len()) {
390                self.block_local_abbrevs.get(local_index).cloned()
391            } else {
392                global_abbrevs.get(abbrev_index).cloned()
393            };
394
395            let abbrev = abbrev.ok_or(Error::NoSuchAbbrev {
396                block_id: self.id,
397                abbrev_id,
398            })?;
399
400            Ok(Some(BlockItem::Record(RecordIter::from_cursor_abbrev(
401                &mut self.cursor,
402                abbrev,
403            )?)))
404        }
405    }
406
407    /// Bit width of abbreviation IDs in this block.
408    ///
409    /// This is an implementation detail,
410    /// intended only for debugging or data dumps.
411    #[must_use]
412    pub fn debug_abbrev_width(&self) -> u8 {
413        self.abbrev_width
414    }
415
416    /// Valid only before any record or subblock has been read. This is the block size in bytes.
417    ///
418    /// This is an implementation detail,
419    /// intended only for debugging or data dumps.
420    #[must_use]
421    pub fn debug_data_len(&self) -> Option<usize> {
422        let bits = self.cursor.unconsumed_bit_len();
423        (bits & 31 != 0).then_some(bits >> 3)
424    }
425
426    fn new(
427        reader: &'global_state mut BitStreamReader,
428        cursor: Cursor<'input>,
429        block_id: u32,
430        abbrev_width: u8,
431    ) -> Self {
432        Self {
433            id: block_id,
434            cursor,
435            abbrev_width,
436            block_local_abbrevs: Vec::new(),
437            reader,
438        }
439    }
440}