llvm_bitcode/
bitcode.rs

1use crate::bits::Cursor;
2use crate::bitstream::{Abbreviation, Operand};
3use crate::bitstream::{PayloadOperand, ScalarOperand};
4use std::cell::RefCell;
5use std::collections::HashMap;
6use std::fmt;
7use std::num::NonZero;
8use std::ops::Range;
9use std::sync::Arc;
10
11use crate::read::{BitStreamReader, Error};
12use crate::visitor::{BitStreamVisitor, CollectingVisitor};
13
14const LLVM_BITCODE_WRAPPER_MAGIC: u32 = 0x0B17C0DE;
15
16/// Represents the contents of a file encoded using the
17/// [LLVM bitstream container format](https://llvm.org/docs/BitCodeFormat.html#bitstream-container-format)
18#[derive(Debug, Clone)]
19pub struct Bitcode {
20    pub signature: Signature,
21    pub elements: Vec<BitcodeElement>,
22    pub block_info: HashMap<u32, BlockInfo>,
23}
24
25/// Blocks in a bitstream denote nested regions of the stream,
26/// and are identified by a content-specific id number
27///
28/// Block IDs 0-7 are reserved for [standard blocks](https://llvm.org/docs/BitCodeFormat.html#standard-blocks)
29/// whose meaning is defined by Bitcode;
30/// block IDs 8 and greater are application specific.
31#[derive(Debug, Clone)]
32pub struct Block {
33    /// Block ID
34    pub id: u32,
35    /// Block elements
36    pub elements: Vec<BitcodeElement>,
37}
38
39#[derive(Debug, Clone)]
40pub enum Payload {
41    Array(Vec<u64>),
42    Char6String(String),
43    Blob(Vec<u8>),
44}
45
46/// Data records consist of a record code and a number of (up to) 64-bit integer values
47///
48/// The interpretation of the code and values is application specific and may vary between different block types.
49#[derive(Debug, Clone)]
50pub struct Record {
51    /// Record code
52    pub id: u64,
53    /// An abbreviated record has a abbreviation id followed by a set of fields
54    fields: Vec<u64>,
55    /// Array and Blob encoding has payload
56    payload: Option<Payload>,
57}
58
59impl Record {
60    #[must_use]
61    pub fn fields(&self) -> &[u64] {
62        &self.fields
63    }
64
65    pub fn take_payload(&mut self) -> Option<Payload> {
66        self.payload.take()
67    }
68}
69
70#[derive(Debug, Clone)]
71enum Ops {
72    Abbrev {
73        /// If under `abbrev.fields.len()`, then it's the next op to read
74        /// If equals `abbrev.fields.len()`, then payload is next
75        /// If greater than `abbrev.fields.len()`, then payload has been read
76        state: usize,
77        abbrev: Arc<Abbreviation>,
78    },
79    /// Num ops left
80    Full(usize),
81}
82
83/// Data records consist of a record code and a number of (up to) 64-bit integer values
84///
85/// The interpretation of the code and values is application specific and may vary between different block types.
86pub struct RecordIter<'cursor, 'input> {
87    /// Record code
88    pub id: u64,
89    cursor: &'cursor mut Cursor<'input>,
90    ops: Ops,
91}
92
93impl<'cursor, 'input> RecordIter<'cursor, 'input> {
94    pub(crate) fn into_record(mut self) -> Result<Record, Error> {
95        let mut fields = Vec::with_capacity(self.len());
96        while let Some(f) = self.next()? {
97            fields.push(f);
98        }
99        Ok(Record {
100            id: self.id,
101            fields,
102            payload: self.payload().ok().flatten(),
103        })
104    }
105
106    fn read_scalar_operand(cursor: &mut Cursor<'_>, operand: ScalarOperand) -> Result<u64, Error> {
107        match operand {
108            ScalarOperand::Char6 => {
109                let value = cursor.read(6)? as u8;
110                Ok(u64::from(match value {
111                    0..=25 => value + b'a',
112                    26..=51 => value + (b'A' - 26),
113                    52..=61 => value - (52 - b'0'),
114                    62 => b'.',
115                    63 => b'_',
116                    _ => return Err(Error::InvalidAbbrev),
117                }))
118            }
119            ScalarOperand::Literal(value) => Ok(value),
120            ScalarOperand::Fixed(width) => Ok(cursor.read(width)?),
121            ScalarOperand::Vbr(width) => Ok(cursor.read_vbr(width)?),
122        }
123    }
124
125    pub(crate) fn from_cursor_abbrev(
126        cursor: &'cursor mut Cursor<'input>,
127        abbrev: Arc<Abbreviation>,
128    ) -> Result<Self, Error> {
129        let id =
130            Self::read_scalar_operand(cursor, *abbrev.fields.first().ok_or(Error::InvalidAbbrev)?)?;
131        Ok(Self {
132            id,
133            cursor,
134            ops: Ops::Abbrev { state: 1, abbrev },
135        })
136    }
137
138    pub(crate) fn from_cursor(cursor: &'cursor mut Cursor<'input>) -> Result<Self, Error> {
139        let id = cursor.read_vbr(6)?;
140        let num_ops = cursor.read_vbr(6)? as usize;
141        Ok(Self {
142            id,
143            cursor,
144            ops: Ops::Full(num_ops),
145        })
146    }
147
148    pub fn payload(&mut self) -> Result<Option<Payload>, Error> {
149        match &mut self.ops {
150            Ops::Abbrev { state, abbrev } => {
151                if *state > abbrev.fields.len() {
152                    return Ok(None);
153                }
154                Ok(match abbrev.payload {
155                    Some(PayloadOperand::Blob) => Some(Payload::Blob(self.blob()?.to_vec())),
156                    Some(PayloadOperand::Array(ScalarOperand::Char6)) => {
157                        Some(Payload::Char6String(
158                            String::from_utf8(self.string()?).map_err(|_| Error::InvalidAbbrev)?,
159                        ))
160                    }
161                    Some(PayloadOperand::Array(_)) => Some(Payload::Array(self.array()?)),
162                    None => None,
163                })
164            }
165            Ops::Full(_) => Ok(None),
166        }
167    }
168
169    /// Number of unread fields, excludes string/array/blob payload
170    #[must_use]
171    pub fn len(&self) -> usize {
172        match &self.ops {
173            Ops::Abbrev { state, abbrev } => abbrev.fields.len().saturating_sub(*state),
174            Ops::Full(num_ops) => *num_ops,
175        }
176    }
177
178    /// Matches len, excludes string/array/blob payload
179    #[must_use]
180    pub fn is_empty(&self) -> bool {
181        self.len() == 0
182    }
183
184    pub fn next(&mut self) -> Result<Option<u64>, Error> {
185        match &mut self.ops {
186            Ops::Abbrev { state, abbrev } => {
187                let Some(&op) = abbrev.fields.get(*state) else {
188                    return Ok(None);
189                };
190                *state += 1;
191                Ok(Some(Self::read_scalar_operand(self.cursor, op)?))
192            }
193            Ops::Full(num_ops) => {
194                if *num_ops == 0 {
195                    return Ok(None);
196                }
197                *num_ops -= 1;
198                Ok(Some(self.cursor.read_vbr(6)?))
199            }
200        }
201    }
202
203    pub fn u64(&mut self) -> Result<u64, Error> {
204        self.next()?.ok_or(Error::EndOfRecord)
205    }
206
207    pub fn nzu64(&mut self) -> Result<Option<NonZero<u64>>, Error> {
208        self.u64().map(NonZero::new)
209    }
210
211    pub fn i64(&mut self) -> Result<i64, Error> {
212        let v = self.u64()?;
213        let shifted = (v >> 1) as i64;
214        Ok(if (v & 1) == 0 {
215            shifted
216        } else if v != 1 {
217            -shifted
218        } else {
219            1 << 63
220        })
221    }
222
223    pub fn u32(&mut self) -> Result<u32, Error> {
224        self.u64()?.try_into().map_err(|_| Error::ValueOverflow)
225    }
226
227    pub fn nzu32(&mut self) -> Result<Option<NonZero<u32>>, Error> {
228        self.u32().map(NonZero::new)
229    }
230
231    pub fn u8(&mut self) -> Result<u8, Error> {
232        self.u64()?.try_into().map_err(|_| Error::ValueOverflow)
233    }
234
235    pub fn try_from<U: TryFrom<u64>, T: TryFrom<U>>(&mut self) -> Result<T, Error> {
236        T::try_from(self.u64()?.try_into().map_err(|_| Error::ValueOverflow)?)
237            .map_err(|_| Error::ValueOverflow)
238    }
239
240    pub fn nzu8(&mut self) -> Result<Option<NonZero<u8>>, Error> {
241        self.u8().map(NonZero::new)
242    }
243
244    pub fn bool(&mut self) -> Result<bool, Error> {
245        match self.u64()? {
246            0 => Ok(false),
247            1 => Ok(true),
248            _ => Err(Error::ValueOverflow),
249        }
250    }
251
252    pub fn range(&mut self) -> Result<Range<usize>, Error> {
253        let start = self.u64()? as usize;
254        Ok(Range {
255            start,
256            end: start + self.u64()? as usize,
257        })
258    }
259
260    pub fn blob(&mut self) -> Result<&'input [u8], Error> {
261        match &mut self.ops {
262            Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
263                Some(PayloadOperand::Blob) => {
264                    let length = self.cursor.read_vbr(6)? as usize;
265                    self.cursor.align32()?;
266                    let data = self.cursor.read_bytes(length)?;
267                    self.cursor.align32()?;
268                    Ok(data)
269                }
270                other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
271            },
272            Ops::Full(_) => Err(Error::UnexpectedOperand(None)),
273        }
274    }
275
276    pub fn array(&mut self) -> Result<Vec<u64>, Error> {
277        match &mut self.ops {
278            Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
279                Some(PayloadOperand::Array(op)) => {
280                    let len = self.cursor.read_vbr(6)? as usize;
281                    let mut out = Vec::with_capacity(len);
282                    for _ in 0..len {
283                        if out.len() == out.capacity() {
284                            debug_assert!(false);
285                            break;
286                        }
287                        out.push(Self::read_scalar_operand(self.cursor, op)?);
288                    }
289                    Ok(out)
290                }
291                other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
292            },
293            // Not a proper array payload, but this fallback pattern is used by LLVM
294            Ops::Full(num_ops) => {
295                let len = *num_ops;
296                *num_ops = 0;
297                let mut out = Vec::with_capacity(len);
298                for _ in 0..len {
299                    if out.len() == out.capacity() {
300                        debug_assert!(false);
301                        break;
302                    }
303                    out.push(self.cursor.read_vbr(6)?);
304                }
305                Ok(out)
306            }
307        }
308    }
309
310    /// Mark payload as read, if there is one
311    fn take_payload_operand(
312        state: &mut usize,
313        abbrev: &Abbreviation,
314    ) -> Result<Option<PayloadOperand>, Error> {
315        if *state == abbrev.fields.len() {
316            if abbrev.payload.is_some() {
317                *state += 1;
318            }
319            Ok(abbrev.payload)
320        } else {
321            Err(Error::UnexpectedOperand(
322                abbrev.fields.get(*state).copied().map(Operand::Scalar),
323            ))
324        }
325    }
326
327    /// Read remainder of the fields as string chars.
328    ///
329    /// Interpret data as UTF-8.
330    /// The string may contain NUL terminator, depending on context.
331    pub fn string_utf8(&mut self) -> Result<String, Error> {
332        String::from_utf8(self.string()?).map_err(Error::Encoding)
333    }
334
335    /// Read remainder of the fields as string chars
336    ///
337    /// The strings are just binary blobs. LLVM doesn't guarantee any encoding.
338    /// The string may contain NUL terminator, depending on context.
339    pub fn string(&mut self) -> Result<Vec<u8>, Error> {
340        match &mut self.ops {
341            Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
342                Some(PayloadOperand::Array(el)) => {
343                    *state += 1;
344                    let len = self.cursor.read_vbr(6)? as usize;
345                    let mut out = Vec::with_capacity(len);
346
347                    match el {
348                        ScalarOperand::Char6 => {
349                            for _ in 0..len {
350                                if out.len() == out.capacity() {
351                                    debug_assert!(false);
352                                    break;
353                                }
354                                let ch = match self.cursor.read(6)? as u8 {
355                                    value @ 0..=25 => value + b'a',
356                                    value @ 26..=51 => value + (b'A' - 26),
357                                    value @ 52..=61 => value - (52 - b'0'),
358                                    62 => b'.',
359                                    63 => b'_',
360                                    _ => return Err(Error::InvalidAbbrev),
361                                };
362                                out.push(ch);
363                            }
364                        }
365                        ScalarOperand::Fixed(width @ 6..=8) => {
366                            for _ in 0..len {
367                                if out.len() == out.capacity() {
368                                    debug_assert!(false);
369                                    break;
370                                }
371                                out.push(self.cursor.read(width)? as u8);
372                            }
373                        }
374                        other => {
375                            return Err(Error::UnexpectedOperand(Some(Operand::Scalar(other))));
376                        }
377                    }
378                    Ok(out)
379                }
380                other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
381            },
382            Ops::Full(num_ops) => {
383                let len = std::mem::replace(num_ops, 0);
384                let mut out = Vec::with_capacity(len);
385                for _ in 0..len {
386                    let ch = self.cursor.read_vbr(6)?;
387                    out.push(u8::try_from(ch).map_err(|_| Error::ValueOverflow)?);
388                }
389                Ok(out)
390            }
391        }
392    }
393
394    /// Zero-terminated string, assumes latin1 encoding
395    pub fn zstring(&mut self) -> Result<String, Error> {
396        let mut s = String::new();
397        while let Some(b) = self.nzu8()? {
398            s.push(b.get() as char);
399        }
400        Ok(s)
401    }
402
403    /// Internal ID of this record's abbreviation, if any.
404    ///
405    /// This is intended only for debugging and data dumps.
406    /// This isn't a stable identifier, and may be block-specific.
407    #[must_use]
408    pub fn debug_abbrev_id(&self) -> Option<u32> {
409        match &self.ops {
410            Ops::Abbrev { abbrev, .. } => Some(abbrev.id),
411            Ops::Full(_) => None,
412        }
413    }
414
415    /// For debug printing
416    fn from_cloned_cursor<'new_cursor>(
417        &self,
418        cursor: &'new_cursor mut Cursor<'input>,
419    ) -> RecordIter<'new_cursor, 'input> {
420        RecordIter {
421            id: self.id,
422            ops: self.ops.clone(),
423            cursor,
424        }
425    }
426}
427
428impl Iterator for RecordIter<'_, '_> {
429    type Item = Result<u64, Error>;
430    fn next(&mut self) -> Option<Self::Item> {
431        self.next().transpose()
432    }
433}
434
435impl Drop for RecordIter<'_, '_> {
436    /// Must drain the remaining records to advance the cursor to the next record
437    fn drop(&mut self) {
438        while let Ok(Some(_)) = self.next() {}
439        if let Ops::Abbrev { abbrev, .. } = &self.ops
440            && abbrev.payload.is_some()
441        {
442            let _ = self.payload();
443        }
444    }
445}
446
447struct RecordIterDebugFields<'c, 'i>(RefCell<RecordIter<'c, 'i>>);
448struct RecordIterDebugResult<T, E>(Result<T, E>);
449
450impl fmt::Debug for RecordIter<'_, '_> {
451    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
452        let mut c = self.cursor.clone();
453        let fields = RecordIterDebugFields(RefCell::new(self.from_cloned_cursor(&mut c)));
454
455        f.debug_struct("RecordIter")
456            .field("id", &self.id)
457            .field("fields", &fields)
458            .field("ops", &self.ops)
459            .field("cursor", &self.cursor)
460            .finish()
461    }
462}
463
464impl fmt::Debug for RecordIterDebugFields<'_, '_> {
465    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
466        let mut iter = self.0.borrow_mut();
467        let mut d = f.debug_list();
468        d.entries(iter.by_ref().map(RecordIterDebugResult));
469        if let Some(p) = iter.payload().transpose() {
470            d.entries([RecordIterDebugResult(p)]);
471        }
472        d.finish()
473    }
474}
475
476impl<T: fmt::Debug, E: fmt::Debug> fmt::Debug for RecordIterDebugResult<T, E> {
477    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
478        match &self.0 {
479            Ok(t) => t.fmt(f),
480            Err(e) => e.fmt(f),
481        }
482    }
483}
484
485/// Bitcode element
486#[derive(Debug, Clone)]
487pub enum BitcodeElement {
488    /// Block
489    Block(Block),
490    /// Data record
491    Record(Record),
492}
493
494impl BitcodeElement {
495    /// Returns true if it is a `Block`
496    #[must_use]
497    pub fn is_block(&self) -> bool {
498        matches!(self, Self::Block(_))
499    }
500
501    /// If it is a `Block`, returns the associated block. Returns `None` otherwise.
502    #[must_use]
503    pub fn as_block(&self) -> Option<&Block> {
504        match self {
505            Self::Block(block) => Some(block),
506            Self::Record(_) => None,
507        }
508    }
509
510    /// If it is a `Block`, returns the associated mutable block. Returns `None` otherwise.
511    pub fn as_block_mut(&mut self) -> Option<&mut Block> {
512        match self {
513            Self::Block(block) => Some(block),
514            Self::Record(_) => None,
515        }
516    }
517
518    /// Returns true if it is a `Record`
519    #[must_use]
520    pub fn is_record(&self) -> bool {
521        matches!(self, Self::Record(_))
522    }
523
524    /// If it is a `Record`, returns the associated record. Returns `None` otherwise.
525    #[must_use]
526    pub fn as_record(&self) -> Option<&Record> {
527        match self {
528            Self::Block(_) => None,
529            Self::Record(record) => Some(record),
530        }
531    }
532
533    /// If it is a `Record`, returns the associated mutable record. Returns `None` otherwise.
534    pub fn as_record_mut(&mut self) -> Option<&mut Record> {
535        match self {
536            Self::Block(_) => None,
537            Self::Record(record) => Some(record),
538        }
539    }
540}
541
542/// Block information
543#[derive(Debug, Clone, Default)]
544pub struct BlockInfo {
545    /// Block name
546    pub name: String,
547    /// Data record names
548    pub record_names: HashMap<u64, String>,
549}
550
551/// aka. Magic number
552#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
553pub struct Signature {
554    pub magic: u32,
555    pub magic2: u32,
556    pub version: u32,
557    pub offset: u32,
558    pub size: u32,
559    pub cpu_type: u32,
560}
561
562impl Signature {
563    #[must_use]
564    pub fn parse(data: &[u8]) -> Option<(Self, &[u8])> {
565        let (signature, remaining_data) = data.split_first_chunk::<4>()?;
566        let magic = u32::from_le_bytes(*signature);
567        if magic != LLVM_BITCODE_WRAPPER_MAGIC {
568            Some((
569                Self {
570                    version: 0,
571                    magic,
572                    magic2: 0,
573                    offset: 4,
574                    size: remaining_data.len() as _,
575                    cpu_type: 0,
576                },
577                remaining_data,
578            ))
579        } else {
580            // It is a LLVM Bitcode wrapper, remove wrapper header
581            if data.len() < 20 {
582                return None;
583            }
584            let mut words = data
585                .chunks_exact(4)
586                .skip(1)
587                .map(|w| u32::from_le_bytes(w.try_into().unwrap()));
588            let version = words.next()?;
589            let offset = words.next()?;
590            let size = words.next()?;
591            let cpu_id = words.next()?;
592            let data = data.get(offset as usize..offset as usize + size as usize)?;
593            let (magic2, remaining_data) = data.split_first_chunk::<4>()?;
594            let magic2 = u32::from_le_bytes(*magic2);
595            Some((
596                Self {
597                    version,
598                    magic,
599                    magic2,
600                    offset,
601                    size,
602                    cpu_type: cpu_id,
603                },
604                remaining_data,
605            ))
606        }
607    }
608}
609
610impl Bitcode {
611    /// Parse bitcode from bytes
612    ///
613    /// Accepts both LLVM bitcode and bitcode wrapper formats
614    pub fn new(data: &[u8]) -> Result<Self, Error> {
615        let (signature, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
616        let mut reader = BitStreamReader::new();
617        let mut visitor = CollectingVisitor::new();
618        reader.read_block(
619            Cursor::new(stream),
620            BitStreamReader::TOP_LEVEL_BLOCK_ID,
621            2,
622            &mut visitor,
623        )?;
624        Ok(Self {
625            signature,
626            elements: visitor.finalize_top_level_elements(),
627            block_info: reader.block_info,
628        })
629    }
630
631    /// Read bitcode from bytes with a visitor
632    ///
633    /// Accepts both LLVM bitcode and bitcode wrapper formats
634    pub fn read<V>(data: &[u8], visitor: &mut V) -> Result<(), Error>
635    where
636        V: BitStreamVisitor,
637    {
638        let (header, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
639        if !visitor.validate(header) {
640            return Err(Error::InvalidSignature(header.magic));
641        }
642        let mut reader = BitStreamReader::new();
643        reader.read_block(
644            Cursor::new(stream),
645            BitStreamReader::TOP_LEVEL_BLOCK_ID,
646            2,
647            visitor,
648        )
649    }
650}