llvm_bitcode/
bitcode.rs

1use std::collections::HashMap;
2
3use crate::bits::Bits;
4use crate::read::{BitStreamReader, Error};
5use crate::visitor::{BitStreamVisitor, CollectingVisitor};
6
7const LLVM_BITCODE_WRAPPER_MAGIC: u32 = 0x0B17C0DE;
8
9/// Represents the contents of a file encoded using the
10/// [LLVM bitstream container format](https://llvm.org/docs/BitCodeFormat.html#bitstream-container-format)
11#[derive(Debug, Clone)]
12pub struct Bitcode {
13    pub signature: Signature,
14    pub elements: Vec<BitcodeElement>,
15    pub block_info: HashMap<u64, BlockInfo>,
16}
17
18/// Blocks in a bitstream denote nested regions of the stream,
19/// and are identified by a content-specific id number
20///
21/// Block IDs 0-7 are reserved for [standard blocks](https://llvm.org/docs/BitCodeFormat.html#standard-blocks)
22/// whose meaning is defined by Bitcode;
23/// block IDs 8 and greater are application specific.
24#[derive(Debug, Clone)]
25pub struct Block {
26    /// Block ID
27    pub id: u64,
28    /// Block elements
29    pub elements: Vec<BitcodeElement>,
30}
31
32#[derive(Debug, Clone)]
33pub enum Payload {
34    Array(Vec<u64>),
35    Char6String(String),
36    Blob(Vec<u8>),
37}
38
39/// Data records consist of a record code and a number of (up to) 64-bit integer values
40///
41/// The interpretation of the code and values is application specific and may vary between different block types.
42#[derive(Debug, Clone)]
43pub struct Record {
44    /// Record code
45    pub id: u64,
46    /// An abbreviated record has a abbreviation id followed by a set of fields
47    pub fields: Vec<u64>,
48    /// Array and Blob encoding has payload
49    pub payload: Option<Payload>,
50}
51
52/// Bitcode element
53#[derive(Debug, Clone)]
54pub enum BitcodeElement {
55    /// Block
56    Block(Block),
57    /// Data record
58    Record(Record),
59}
60
61impl BitcodeElement {
62    /// Returns true if it is a `Block`
63    pub fn is_block(&self) -> bool {
64        matches!(self, BitcodeElement::Block(_))
65    }
66
67    /// If it is a `Block`, returns the associated block. Returns `None` otherwise.
68    pub fn as_block(&self) -> Option<&Block> {
69        match self {
70            BitcodeElement::Block(block) => Some(block),
71            BitcodeElement::Record(_) => None,
72        }
73    }
74
75    /// If it is a `Block`, returns the associated mutable block. Returns `None` otherwise.
76    pub fn as_block_mut(&mut self) -> Option<&mut Block> {
77        match self {
78            BitcodeElement::Block(block) => Some(block),
79            BitcodeElement::Record(_) => None,
80        }
81    }
82
83    /// Returns true if it is a `Record`
84    pub fn is_record(&self) -> bool {
85        matches!(self, BitcodeElement::Record(_))
86    }
87
88    /// If it is a `Record`, returns the associated record. Returns `None` otherwise.
89    pub fn as_record(&self) -> Option<&Record> {
90        match self {
91            BitcodeElement::Block(_) => None,
92            BitcodeElement::Record(record) => Some(record),
93        }
94    }
95
96    /// If it is a `Record`, returns the associated mutable record. Returns `None` otherwise.
97    pub fn as_record_mut(&mut self) -> Option<&mut Record> {
98        match self {
99            BitcodeElement::Block(_) => None,
100            BitcodeElement::Record(record) => Some(record),
101        }
102    }
103}
104
105/// Block information
106#[derive(Debug, Clone, Default)]
107pub struct BlockInfo {
108    /// Block name
109    pub name: String,
110    /// Data record names
111    pub record_names: HashMap<u64, String>,
112}
113
114/// aka. Magic number
115#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
116pub struct Signature(u32);
117
118impl Signature {
119    pub fn new(val: u32) -> Self {
120        Self(val)
121    }
122
123    pub fn into_inner(self) -> u32 {
124        self.0
125    }
126}
127
128impl Bitcode {
129    fn clean(data: &[u8]) -> (Signature, &[u8]) {
130        assert!(data.len() > 4);
131        let signature = Bits::new(data).read_bits(0, 32) as u32;
132        if signature == LLVM_BITCODE_WRAPPER_MAGIC {
133            // It is a LLVM Bitcode wrapper, remove wrapper header
134            assert!(data.len() > 20);
135            let offset = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;
136            let size = u32::from_le_bytes([data[12], data[13], data[14], data[15]]) as usize;
137            let data = &data[offset..offset + size];
138            let signature = Bits::new(data).read_bits(0, 32) as u32;
139            (Signature(signature), &data[4..])
140        } else {
141            (Signature(signature), &data[4..])
142        }
143    }
144
145    /// Parse bitcode from bytes
146    ///
147    /// Accepts both LLVM bitcode and bitcode wrapper formats
148    pub fn new(data: &[u8]) -> Result<Self, Error> {
149        let (signature, stream) = Self::clean(data);
150        let mut reader = BitStreamReader::new(stream);
151        let mut visitor = CollectingVisitor::new();
152        reader.read_block(BitStreamReader::TOP_LEVEL_BLOCK_ID, 2, &mut visitor)?;
153        Ok(Self {
154            signature,
155            elements: visitor.finalize_top_level_elements(),
156            block_info: reader.block_info,
157        })
158    }
159
160    /// Read bitcode from bytes with a visitor
161    ///
162    /// Accepts both LLVM bitcode and bitcode wrapper formats
163    pub fn read<V>(data: &[u8], visitor: &mut V) -> Result<(), Error>
164    where
165        V: BitStreamVisitor,
166    {
167        let (signature, stream) = Self::clean(data);
168        if !visitor.validate(signature) {
169            return Err(Error::InvalidSignature(signature.into_inner()));
170        }
171        let mut reader = BitStreamReader::new(stream);
172        reader.read_block(BitStreamReader::TOP_LEVEL_BLOCK_ID, 2, visitor)
173    }
174}