Skip to main content

llvm_bitcode/
read.rs

1use crate::bitstream::{PayloadOperand, ScalarOperand};
2use std::string::FromUtf8Error;
3use std::sync::Arc;
4use std::{collections::HashMap, convert::TryFrom, error, fmt};
5
6use crate::bitcode::{BlockInfo, RecordIter};
7use crate::bits::{self, Cursor};
8use crate::bitstream::{Abbreviation, BlockInfoCode, BuiltinAbbreviationId, Operand};
9use crate::visitor::BitStreamVisitor;
10
11/// Bitstream reader errors
12#[derive(Debug, Clone)]
13pub enum Error {
14    EndOfRecord,
15    ValueOverflow,
16    UnexpectedOperand(Option<Operand>),
17    InvalidSignature(u32),
18    InvalidAbbrev,
19    NestedBlockInBlockInfo,
20    MissingSetBid,
21    InvalidBlockInfoRecord(u64),
22    NoSuchAbbrev { block_id: u32, abbrev_id: u32 },
23    UnexpectedBlock(u32),
24    MissingEndBlock(u32),
25    AbbrevWidthTooSmall(u8),
26    ReadBits(bits::Error),
27    Encoding(FromUtf8Error),
28    Other(&'static str),
29}
30
31impl fmt::Display for Error {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        match self {
34            Self::EndOfRecord => write!(f, "read past end of record"),
35            Self::ValueOverflow => write!(
36                f,
37                "integer out of range (likely due to misparsing the format)"
38            ),
39            Self::UnexpectedOperand(op) => write!(f, "Unexpected operand {op:?}"),
40            Self::InvalidSignature(sig) => {
41                write!(f, "invalid signature (magic number): 0x{sig:x}")
42            }
43            Self::InvalidAbbrev => write!(f, "invalid abbreviation"),
44            Self::NestedBlockInBlockInfo => {
45                write!(f, "nested block in block info")
46            }
47            Self::UnexpectedBlock(id) => write!(f, "nested block {id}"),
48            Self::MissingSetBid => write!(f, "missing SETBID"),
49            Self::InvalidBlockInfoRecord(record_id) => {
50                write!(f, "invalid block info record `{record_id}`")
51            }
52            Self::AbbrevWidthTooSmall(width) => {
53                write!(f, "abbreviation width `{width}` is too small")
54            }
55            Self::NoSuchAbbrev {
56                block_id,
57                abbrev_id,
58            } => write!(
59                f,
60                "no such abbreviation `{abbrev_id}` in block `{block_id}`"
61            ),
62            Self::MissingEndBlock(block_id) => {
63                write!(f, "missing end block for `{block_id}`")
64            }
65            Self::ReadBits(err) => err.fmt(f),
66            Self::Encoding(err) => err.fmt(f),
67            Self::Other(err) => err.fmt(f),
68        }
69    }
70}
71
72impl error::Error for Error {}
73
74impl From<bits::Error> for Error {
75    fn from(err: bits::Error) -> Self {
76        Self::ReadBits(err)
77    }
78}
79
80/// A block can contain either nested blocks or records.
81/// LLVM writes blocks first, but the format allows them to be mixed freely.
82#[derive(Debug)]
83pub enum BlockItem<'cursor, 'input> {
84    /// Recurse
85    Block(BlockIter<'cursor, 'input>),
86    /// Read a record from the current block
87    Record(RecordIter<'cursor, 'input>),
88}
89
90/// Iterator content directly in a block
91#[derive(Debug)]
92pub struct BlockIter<'global_state, 'input> {
93    /// ID of the block being iterated
94    pub id: u32,
95    cursor: Cursor<'input>,
96    abbrev_width: u8,
97    /// Abbreviations defined in this block
98    block_local_abbrevs: Vec<Arc<Abbreviation>>,
99    /// Global abbreviations and names
100    reader: &'global_state mut BitStreamReader,
101}
102
103/// Bitstream reader
104#[derive(Debug, Clone)]
105pub struct BitStreamReader {
106    /// Block information
107    pub(crate) block_info: HashMap<u32, BlockInfo>,
108    global_abbrevs: HashMap<u32, Vec<Arc<Abbreviation>>>,
109}
110
111impl BitStreamReader {
112    /// Top level fake block ID
113    pub const TOP_LEVEL_BLOCK_ID: u32 = u32::MAX;
114
115    #[must_use]
116    pub fn new() -> Self {
117        Self {
118            block_info: HashMap::new(),
119            global_abbrevs: HashMap::new(),
120        }
121    }
122
123    /// Skip `Signature` first
124    pub fn iter_bitcode<'input>(&mut self, bitcode_data: &'input [u8]) -> BlockIter<'_, 'input> {
125        BlockIter::new(self, Cursor::new(bitcode_data), Self::TOP_LEVEL_BLOCK_ID, 2)
126    }
127
128    fn visit_block<V: BitStreamVisitor>(
129        mut block: BlockIter<'_, '_>,
130        visitor: &mut V,
131    ) -> Result<(), Error> {
132        let block_id = block.id;
133        while let Some(item) = block.try_next()? {
134            match item {
135                BlockItem::Block(new_block) => {
136                    let new_id = new_block.id;
137                    if visitor.should_enter_block(new_id) {
138                        Self::visit_block(new_block, visitor)?;
139                        visitor.did_exit_block(new_id);
140                    }
141                }
142                BlockItem::Record(record) => {
143                    visitor.visit(block_id, record.into_record()?);
144                }
145            }
146        }
147        Ok(())
148    }
149
150    /// Read abbreviated operand
151    #[inline(never)]
152    fn read_abbrev_op(cursor: &mut Cursor<'_>, num_ops_left: &mut usize) -> Result<Operand, Error> {
153        if *num_ops_left == 0 {
154            return Err(Error::InvalidAbbrev);
155        }
156        *num_ops_left -= 1;
157
158        let is_literal = cursor.read(1)?;
159        if is_literal == 1 {
160            return Ok(Operand::Scalar(ScalarOperand::Literal(cursor.read_vbr(8)?)));
161        }
162        let op_type = cursor.read(3)?;
163        Ok(match op_type {
164            1 => {
165                let width = cursor.read_vbr(5)?;
166                if width < 1 || width > 32 {
167                    return Err(Error::AbbrevWidthTooSmall(width as u8));
168                }
169                Operand::Scalar(ScalarOperand::Fixed(width as u8))
170            }
171            2 => {
172                let width = cursor.read_vbr(5)?;
173                if width < 1 || width > 32 {
174                    return Err(Error::AbbrevWidthTooSmall(width as u8));
175                }
176                Operand::Scalar(ScalarOperand::Vbr(width as u8))
177            }
178            3 if *num_ops_left == 1 => {
179                let op = Self::read_abbrev_op(cursor, num_ops_left)?;
180                if let Operand::Scalar(op) = op {
181                    Operand::Payload(PayloadOperand::Array(op))
182                } else {
183                    return Err(Error::UnexpectedOperand(Some(op)));
184                }
185            }
186            4 => Operand::Scalar(ScalarOperand::Char6),
187            5 if *num_ops_left == 0 => Operand::Payload(PayloadOperand::Blob),
188            _ => return Err(Error::InvalidAbbrev),
189        })
190    }
191
192    /// Read abbreviation
193    fn define_abbrev(
194        cursor: &mut Cursor<'_>,
195        abbrevs: &mut Vec<Arc<Abbreviation>>,
196    ) -> Result<(), Error> {
197        let mut num_ops = cursor.read_vbr(5)? as usize;
198
199        let mut fields = Vec::with_capacity(num_ops);
200        let mut payload = None;
201        while num_ops > 0 && fields.len() != fields.capacity() {
202            match Self::read_abbrev_op(cursor, &mut num_ops)? {
203                Operand::Scalar(op) => {
204                    fields.push(op);
205                }
206                Operand::Payload(op) if num_ops == 0 => {
207                    payload = Some(op);
208                }
209                op => return Err(Error::UnexpectedOperand(Some(op))),
210            }
211        }
212        let id = abbrevs.len() as u32;
213        let abbrev = Arc::new(Abbreviation {
214            fields,
215            payload,
216            id,
217        });
218        abbrevs.push(abbrev);
219        Ok(())
220    }
221
222    /// Read block info block
223    fn read_block_info_block(
224        &mut self,
225        cursor: &mut Cursor<'_>,
226        abbrev_width: u8,
227    ) -> Result<(), Error> {
228        use BuiltinAbbreviationId::*;
229
230        let mut current_block_id: Option<u32> = None;
231        loop {
232            let abbrev_id = cursor.read(abbrev_width)? as u32;
233            match BuiltinAbbreviationId::try_from(abbrev_id).map_err(|_| Error::NoSuchAbbrev {
234                block_id: 0,
235                abbrev_id,
236            })? {
237                EndBlock => {
238                    cursor.align32()?;
239                    return Ok(());
240                }
241                EnterSubBlock => {
242                    return Err(Error::NestedBlockInBlockInfo);
243                }
244                DefineAbbreviation => {
245                    let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
246                    Self::define_abbrev(cursor, self.global_abbrevs.entry(block_id).or_default())?;
247                }
248                UnabbreviatedRecord => {
249                    let mut record = RecordIter::from_cursor(cursor)?;
250                    let block = u8::try_from(record.id)
251                        .ok()
252                        .and_then(|c| BlockInfoCode::try_from(c).ok())
253                        .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
254                    match block {
255                        BlockInfoCode::SetBid => {
256                            let id = record
257                                .u32()
258                                .ok()
259                                .filter(|_| record.is_empty())
260                                .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
261                            current_block_id = Some(id);
262                        }
263                        BlockInfoCode::BlockName => {
264                            let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
265                            let block_info = self.block_info.entry(block_id).or_default();
266                            if let Ok(name) = String::from_utf8(record.string()?) {
267                                block_info.name = name;
268                            }
269                        }
270                        BlockInfoCode::SetRecordName => {
271                            let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
272                            let record_id = record
273                                .u64()
274                                .map_err(|_| Error::InvalidBlockInfoRecord(record.id))?;
275                            let block_info = self.block_info.entry(block_id).or_default();
276                            if let Ok(name) = String::from_utf8(record.string()?) {
277                                block_info.record_names.insert(record_id, name);
278                            }
279                        }
280                    }
281                }
282            }
283        }
284    }
285
286    /// Read block with visitor
287    pub fn read_block<V: BitStreamVisitor>(
288        &mut self,
289        cursor: Cursor<'_>,
290        block_id: u32,
291        abbrev_width: u8,
292        visitor: &mut V,
293    ) -> Result<(), Error> {
294        Self::visit_block(
295            BlockIter::new(self, cursor, block_id, abbrev_width),
296            visitor,
297        )
298    }
299}
300
301impl<'global_state, 'input> BlockIter<'global_state, 'input> {
302    pub fn next_record<'parent>(
303        &'parent mut self,
304    ) -> Result<Option<RecordIter<'parent, 'input>>, Error> {
305        match self.try_next()? {
306            None => Ok(None),
307            Some(BlockItem::Record(rec)) => Ok(Some(rec)),
308            Some(BlockItem::Block(block)) => Err(Error::UnexpectedBlock(block.id)),
309        }
310    }
311
312    #[doc(hidden)]
313    #[deprecated(note = "renamed to `try_next`")]
314    pub fn next<'parent>(&'parent mut self) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
315        self.try_next()
316    }
317
318    /// Returns the next item (block or record) in this block
319    pub fn try_next<'parent>(
320        &'parent mut self,
321    ) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
322        if self.cursor.is_at_end() {
323            return if self.id == BitStreamReader::TOP_LEVEL_BLOCK_ID {
324                Ok(None)
325            } else {
326                Err(Error::MissingEndBlock(self.id))
327            };
328        }
329
330        let abbrev_id = self.cursor.read(self.abbrev_width)? as u32;
331
332        if let Ok(builtin_abbrev) = BuiltinAbbreviationId::try_from(abbrev_id) {
333            use BuiltinAbbreviationId::*;
334            match builtin_abbrev {
335                EndBlock => {
336                    self.cursor.align32()?;
337                    Ok(None)
338                }
339                EnterSubBlock => {
340                    let block_id = self.cursor.read_vbr(8)? as u32;
341                    let new_abbrev_width = self.cursor.read_vbr(4)? as u8;
342                    self.cursor.align32()?;
343                    let block_length = self.cursor.read(32)? as usize * 4;
344                    let mut cursor = self.cursor.take_slice(block_length)?;
345
346                    if block_id == 0 {
347                        self.reader
348                            .read_block_info_block(&mut cursor, new_abbrev_width)?;
349                        return self.try_next();
350                    }
351
352                    // Create new block iterator
353                    let block_iter =
354                        BlockIter::new(self.reader, cursor, block_id, new_abbrev_width);
355                    Ok(Some(BlockItem::Block(block_iter)))
356                }
357                DefineAbbreviation => {
358                    BitStreamReader::define_abbrev(
359                        &mut self.cursor,
360                        &mut self.block_local_abbrevs,
361                    )?;
362                    self.try_next()
363                }
364                UnabbreviatedRecord => {
365                    let record_iter = RecordIter::from_cursor(&mut self.cursor)?;
366                    Ok(Some(BlockItem::Record(record_iter)))
367                }
368            }
369        } else {
370            let abbrev_index = abbrev_id as usize - 4;
371            let global_abbrevs = self
372                .reader
373                .global_abbrevs
374                .get(&self.id)
375                .map(|v| v.as_slice())
376                .unwrap_or_default();
377
378            // > Any abbreviations defined in a BLOCKINFO record for the particular block type receive IDs first,
379            // > followed by any abbreviations defined within the block itself.
380            let abbrev = if let Some(local_index) = abbrev_index.checked_sub(global_abbrevs.len()) {
381                self.block_local_abbrevs.get(local_index).cloned()
382            } else {
383                global_abbrevs.get(abbrev_index).cloned()
384            };
385
386            let abbrev = abbrev.ok_or(Error::NoSuchAbbrev {
387                block_id: self.id,
388                abbrev_id,
389            })?;
390
391            Ok(Some(BlockItem::Record(RecordIter::from_cursor_abbrev(
392                &mut self.cursor,
393                abbrev,
394            )?)))
395        }
396    }
397
398    /// Bit width of abbreviation IDs in this block.
399    ///
400    /// This is an implementation detail,
401    /// intended only for debugging or data dumps.
402    #[must_use]
403    pub fn debug_abbrev_width(&self) -> u8 {
404        self.abbrev_width
405    }
406
407    /// Valid only before any record or subblock has been read. This is the block size in bytes.
408    ///
409    /// This is an implementation detail,
410    /// intended only for debugging or data dumps.
411    #[must_use]
412    pub fn debug_data_len(&self) -> Option<usize> {
413        let bits = self.cursor.unconsumed_bit_len();
414        (bits & 31 != 0).then_some(bits >> 3)
415    }
416
417    fn new(
418        reader: &'global_state mut BitStreamReader,
419        cursor: Cursor<'input>,
420        block_id: u32,
421        abbrev_width: u8,
422    ) -> Self {
423        Self {
424            id: block_id,
425            cursor,
426            abbrev_width,
427            block_local_abbrevs: Vec::new(),
428            reader,
429        }
430    }
431}