llvm_bitcode/
read.rs

1use crate::bitstream::{PayloadOperand, ScalarOperand};
2use std::string::FromUtf8Error;
3use std::sync::Arc;
4use std::{collections::HashMap, convert::TryFrom, error, fmt};
5
6use crate::bitcode::{BlockInfo, RecordIter};
7use crate::bits::{self, Cursor};
8use crate::bitstream::{Abbreviation, BlockInfoCode, BuiltinAbbreviationId, Operand};
9use crate::visitor::BitStreamVisitor;
10
11/// Bitstream reader errors
12#[derive(Debug, Clone)]
13pub enum Error {
14    EndOfRecord,
15    ValueOverflow,
16    UnexpectedOperand(Option<Operand>),
17    InvalidSignature(u32),
18    InvalidAbbrev,
19    NestedBlockInBlockInfo,
20    MissingSetBid,
21    InvalidBlockInfoRecord(u64),
22    NoSuchAbbrev { block_id: u32, abbrev_id: u32 },
23    UnexpectedBlock(u32),
24    MissingEndBlock(u32),
25    AbbrevWidthTooSmall(u8),
26    ReadBits(bits::Error),
27    Encoding(FromUtf8Error),
28    Other(&'static str),
29}
30
31impl fmt::Display for Error {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        match self {
34            Self::EndOfRecord => write!(f, "read past end of record"),
35            Self::ValueOverflow => write!(f, "read integer too big"),
36            Self::UnexpectedOperand(op) => write!(f, "Unexpected operand {op:?}"),
37            Self::InvalidSignature(sig) => {
38                write!(f, "invalid signature (magic number): 0x{sig:x}")
39            }
40            Self::InvalidAbbrev => write!(f, "invalid abbreviation"),
41            Self::NestedBlockInBlockInfo => {
42                write!(f, "nested block in block info")
43            }
44            Self::UnexpectedBlock(id) => write!(f, "nested block {id}"),
45            Self::MissingSetBid => write!(f, "missing SETBID"),
46            Self::InvalidBlockInfoRecord(record_id) => {
47                write!(f, "invalid block info record `{record_id}`")
48            }
49            Self::AbbrevWidthTooSmall(width) => {
50                write!(f, "abbreviation width `{width}` is too small")
51            }
52            Self::NoSuchAbbrev {
53                block_id,
54                abbrev_id,
55            } => write!(
56                f,
57                "no such abbreviation `{abbrev_id}` in block `{block_id}`"
58            ),
59            Self::MissingEndBlock(block_id) => {
60                write!(f, "missing end block for `{block_id}`")
61            }
62            Self::ReadBits(err) => err.fmt(f),
63            Self::Encoding(err) => err.fmt(f),
64            Self::Other(err) => err.fmt(f),
65        }
66    }
67}
68
69impl error::Error for Error {}
70
71impl From<bits::Error> for Error {
72    fn from(err: bits::Error) -> Self {
73        Self::ReadBits(err)
74    }
75}
76
77/// A block can contain either nested blocks or records.
78/// LLVM writes blocks first, but the format allows them to be mixed freely.
79#[derive(Debug)]
80pub enum BlockItem<'cursor, 'input> {
81    /// Recurse
82    Block(BlockIter<'cursor, 'input>),
83    /// Read a record from the current block
84    Record(RecordIter<'cursor, 'input>),
85}
86
87/// Iterator content directly in a block
88#[derive(Debug)]
89pub struct BlockIter<'global_state, 'input> {
90    /// ID of the block being iterated
91    pub id: u32,
92    cursor: Cursor<'input>,
93    abbrev_width: u8,
94    /// Abbreviations defined in this block
95    block_local_abbrevs: Vec<Arc<Abbreviation>>,
96    /// Global abbreviations and names
97    reader: &'global_state mut BitStreamReader,
98}
99
100/// Bitstream reader
101#[derive(Debug, Clone)]
102pub struct BitStreamReader {
103    /// Block information
104    pub(crate) block_info: HashMap<u32, BlockInfo>,
105    global_abbrevs: HashMap<u32, Vec<Arc<Abbreviation>>>,
106}
107
108impl BitStreamReader {
109    /// Top level fake block ID
110    pub const TOP_LEVEL_BLOCK_ID: u32 = u32::MAX;
111
112    #[must_use]
113    pub fn new() -> Self {
114        Self {
115            block_info: HashMap::new(),
116            global_abbrevs: HashMap::new(),
117        }
118    }
119
120    /// Skip `Signature` first
121    pub fn iter_bitcode<'input>(&mut self, bitcode_data: &'input [u8]) -> BlockIter<'_, 'input> {
122        BlockIter::new(self, Cursor::new(bitcode_data), Self::TOP_LEVEL_BLOCK_ID, 2)
123    }
124
125    fn visit_block<V: BitStreamVisitor>(
126        mut block: BlockIter<'_, '_>,
127        visitor: &mut V,
128    ) -> Result<(), Error> {
129        let block_id = block.id;
130        while let Some(item) = block.next()? {
131            match item {
132                BlockItem::Block(new_block) => {
133                    let new_id = new_block.id;
134                    if visitor.should_enter_block(new_id) {
135                        Self::visit_block(new_block, visitor)?;
136                        visitor.did_exit_block(new_id);
137                    }
138                }
139                BlockItem::Record(record) => {
140                    visitor.visit(block_id, record.into_record()?);
141                }
142            }
143        }
144        Ok(())
145    }
146
147    /// Read abbreviated operand
148    #[inline(never)]
149    fn read_abbrev_op(cursor: &mut Cursor<'_>, num_ops_left: &mut usize) -> Result<Operand, Error> {
150        if *num_ops_left == 0 {
151            return Err(Error::InvalidAbbrev);
152        }
153        *num_ops_left -= 1;
154
155        let is_literal = cursor.read(1)?;
156        if is_literal == 1 {
157            return Ok(Operand::Scalar(ScalarOperand::Literal(cursor.read_vbr(8)?)));
158        }
159        let op_type = cursor.read(3)?;
160        Ok(match op_type {
161            1 => {
162                let width = cursor.read_vbr(5)?;
163                if width < 1 || width > 32 {
164                    return Err(Error::AbbrevWidthTooSmall(width as u8));
165                }
166                Operand::Scalar(ScalarOperand::Fixed(width as u8))
167            }
168            2 => {
169                let width = cursor.read_vbr(5)?;
170                if width < 1 || width > 32 {
171                    return Err(Error::AbbrevWidthTooSmall(width as u8));
172                }
173                Operand::Scalar(ScalarOperand::Vbr(width as u8))
174            }
175            3 if *num_ops_left == 1 => {
176                let op = Self::read_abbrev_op(cursor, num_ops_left)?;
177                if let Operand::Scalar(op) = op {
178                    Operand::Payload(PayloadOperand::Array(op))
179                } else {
180                    return Err(Error::UnexpectedOperand(Some(op)));
181                }
182            }
183            4 => Operand::Scalar(ScalarOperand::Char6),
184            5 if *num_ops_left == 0 => Operand::Payload(PayloadOperand::Blob),
185            _ => return Err(Error::InvalidAbbrev),
186        })
187    }
188
189    /// Read abbreviation
190    fn define_abbrev(
191        cursor: &mut Cursor<'_>,
192        abbrevs: &mut Vec<Arc<Abbreviation>>,
193    ) -> Result<(), Error> {
194        let mut num_ops = cursor.read_vbr(5)? as usize;
195
196        let mut fields = Vec::with_capacity(num_ops);
197        let mut payload = None;
198        while num_ops > 0 && fields.len() != fields.capacity() {
199            match Self::read_abbrev_op(cursor, &mut num_ops)? {
200                Operand::Scalar(op) => {
201                    fields.push(op);
202                }
203                Operand::Payload(op) if num_ops == 0 => {
204                    payload = Some(op);
205                }
206                op => return Err(Error::UnexpectedOperand(Some(op))),
207            }
208        }
209        let id = abbrevs.len() as u32;
210        let abbrev = Arc::new(Abbreviation {
211            fields,
212            payload,
213            id,
214        });
215        abbrevs.push(abbrev);
216        Ok(())
217    }
218
219    /// Read block info block
220    fn read_block_info_block(
221        &mut self,
222        cursor: &mut Cursor<'_>,
223        abbrev_width: u8,
224    ) -> Result<(), Error> {
225        use BuiltinAbbreviationId::*;
226
227        let mut current_block_id: Option<u32> = None;
228        loop {
229            let abbrev_id = cursor.read(abbrev_width)? as u32;
230            match BuiltinAbbreviationId::try_from(abbrev_id).map_err(|_| Error::NoSuchAbbrev {
231                block_id: 0,
232                abbrev_id,
233            })? {
234                EndBlock => {
235                    cursor.align32()?;
236                    return Ok(());
237                }
238                EnterSubBlock => {
239                    return Err(Error::NestedBlockInBlockInfo);
240                }
241                DefineAbbreviation => {
242                    let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
243                    Self::define_abbrev(cursor, self.global_abbrevs.entry(block_id).or_default())?;
244                }
245                UnabbreviatedRecord => {
246                    let mut record = RecordIter::from_cursor(cursor)?;
247                    let block = u8::try_from(record.id)
248                        .ok()
249                        .and_then(|c| BlockInfoCode::try_from(c).ok())
250                        .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
251                    match block {
252                        BlockInfoCode::SetBid => {
253                            let id = record
254                                .u32()
255                                .ok()
256                                .filter(|_| record.is_empty())
257                                .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
258                            current_block_id = Some(id);
259                        }
260                        BlockInfoCode::BlockName => {
261                            let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
262                            let block_info = self.block_info.entry(block_id).or_default();
263                            if let Ok(name) = String::from_utf8(record.string()?) {
264                                block_info.name = name;
265                            }
266                        }
267                        BlockInfoCode::SetRecordName => {
268                            let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
269                            let record_id = record
270                                .u64()
271                                .map_err(|_| Error::InvalidBlockInfoRecord(record.id))?;
272                            let block_info = self.block_info.entry(block_id).or_default();
273                            if let Ok(name) = String::from_utf8(record.string()?) {
274                                block_info.record_names.insert(record_id, name);
275                            }
276                        }
277                    }
278                }
279            }
280        }
281    }
282
283    /// Read block with visitor
284    pub fn read_block<V: BitStreamVisitor>(
285        &mut self,
286        cursor: Cursor<'_>,
287        block_id: u32,
288        abbrev_width: u8,
289        visitor: &mut V,
290    ) -> Result<(), Error> {
291        Self::visit_block(
292            BlockIter::new(self, cursor, block_id, abbrev_width),
293            visitor,
294        )
295    }
296}
297
298impl<'global_state, 'input> BlockIter<'global_state, 'input> {
299    pub fn next_record<'parent>(
300        &'parent mut self,
301    ) -> Result<Option<RecordIter<'parent, 'input>>, Error> {
302        match self.next()? {
303            None => Ok(None),
304            Some(BlockItem::Record(rec)) => Ok(Some(rec)),
305            Some(BlockItem::Block(block)) => Err(Error::UnexpectedBlock(block.id)),
306        }
307    }
308
309    /// Returns the next item (block or record) in this block
310    pub fn next<'parent>(&'parent mut self) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
311        if self.cursor.is_at_end() {
312            return if self.id == BitStreamReader::TOP_LEVEL_BLOCK_ID {
313                Ok(None)
314            } else {
315                Err(Error::MissingEndBlock(self.id))
316            };
317        }
318
319        let abbrev_id = self.cursor.read(self.abbrev_width)? as u32;
320
321        if let Ok(builtin_abbrev) = BuiltinAbbreviationId::try_from(abbrev_id) {
322            use BuiltinAbbreviationId::*;
323            match builtin_abbrev {
324                EndBlock => {
325                    self.cursor.align32()?;
326                    Ok(None)
327                }
328                EnterSubBlock => {
329                    let block_id = self.cursor.read_vbr(8)? as u32;
330                    let new_abbrev_width = self.cursor.read_vbr(4)? as u8;
331                    self.cursor.align32()?;
332                    let block_length = self.cursor.read(32)? as usize * 4;
333                    let mut cursor = self.cursor.take_slice(block_length)?;
334
335                    if block_id == 0 {
336                        self.reader
337                            .read_block_info_block(&mut cursor, new_abbrev_width)?;
338                        return self.next();
339                    }
340
341                    // Create new block iterator
342                    let block_iter =
343                        BlockIter::new(self.reader, cursor, block_id, new_abbrev_width);
344                    Ok(Some(BlockItem::Block(block_iter)))
345                }
346                DefineAbbreviation => {
347                    BitStreamReader::define_abbrev(
348                        &mut self.cursor,
349                        &mut self.block_local_abbrevs,
350                    )?;
351                    self.next()
352                }
353                UnabbreviatedRecord => {
354                    let record_iter = RecordIter::from_cursor(&mut self.cursor)?;
355                    Ok(Some(BlockItem::Record(record_iter)))
356                }
357            }
358        } else {
359            let abbrev_index = abbrev_id as usize - 4;
360            let global_abbrevs = self
361                .reader
362                .global_abbrevs
363                .get(&self.id)
364                .map(|v| v.as_slice())
365                .unwrap_or_default();
366
367            // > Any abbreviations defined in a BLOCKINFO record for the particular block type receive IDs first,
368            // > followed by any abbreviations defined within the block itself.
369            let abbrev = if let Some(local_index) = abbrev_index.checked_sub(global_abbrevs.len()) {
370                self.block_local_abbrevs.get(local_index).cloned()
371            } else {
372                global_abbrevs.get(abbrev_index).cloned()
373            };
374
375            let abbrev = abbrev.ok_or(Error::NoSuchAbbrev {
376                block_id: self.id,
377                abbrev_id,
378            })?;
379
380            Ok(Some(BlockItem::Record(RecordIter::from_cursor_abbrev(
381                &mut self.cursor,
382                abbrev,
383            )?)))
384        }
385    }
386
387    /// Bit width of abbreviation IDs in this block.
388    ///
389    /// This is an implementation detail,
390    /// intended only for debugging or data dumps.
391    #[must_use]
392    pub fn debug_abbrev_width(&self) -> u8 {
393        self.abbrev_width
394    }
395
396    /// Valid only before any record or subblock has been read. This is the block size in bytes.
397    ///
398    /// This is an implementation detail,
399    /// intended only for debugging or data dumps.
400    #[must_use]
401    pub fn debug_data_len(&self) -> Option<usize> {
402        let bits = self.cursor.unconsumed_bit_len();
403        (bits & 31 != 0).then_some(bits >> 3)
404    }
405
406    fn new(
407        reader: &'global_state mut BitStreamReader,
408        cursor: Cursor<'input>,
409        block_id: u32,
410        abbrev_width: u8,
411    ) -> Self {
412        Self {
413            id: block_id,
414            cursor,
415            abbrev_width,
416            block_local_abbrevs: Vec::new(),
417            reader,
418        }
419    }
420}