1use crate::bitstream::{PayloadOperand, ScalarOperand};
2use std::string::FromUtf8Error;
3use std::sync::Arc;
4use std::{collections::HashMap, convert::TryFrom, error, fmt};
5
6use crate::bitcode::{BlockInfo, RecordIter};
7use crate::bits::{self, Cursor};
8use crate::bitstream::{Abbreviation, BlockInfoCode, BuiltinAbbreviationId, Operand};
9use crate::visitor::BitStreamVisitor;
10
11#[derive(Debug, Clone)]
13pub enum Error {
14 EndOfRecord,
15 ValueOverflow,
16 UnexpectedOperand(Option<Operand>),
17 InvalidSignature(u32),
18 InvalidAbbrev,
19 NestedBlockInBlockInfo,
20 MissingSetBid,
21 InvalidBlockInfoRecord(u64),
22 NoSuchAbbrev { block_id: u32, abbrev_id: u32 },
23 UnexpectedBlock(u32),
24 MissingEndBlock(u32),
25 AbbrevWidthTooSmall(u8),
26 ReadBits(bits::Error),
27 Encoding(FromUtf8Error),
28 Other(&'static str),
29}
30
31impl fmt::Display for Error {
32 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33 match self {
34 Self::EndOfRecord => write!(f, "read past end of record"),
35 Self::ValueOverflow => write!(f, "read integer too big"),
36 Self::UnexpectedOperand(op) => write!(f, "Unexpected operand {op:?}"),
37 Self::InvalidSignature(sig) => {
38 write!(f, "invalid signature (magic number): 0x{sig:x}")
39 }
40 Self::InvalidAbbrev => write!(f, "invalid abbreviation"),
41 Self::NestedBlockInBlockInfo => {
42 write!(f, "nested block in block info")
43 }
44 Self::UnexpectedBlock(id) => write!(f, "nested block {id}"),
45 Self::MissingSetBid => write!(f, "missing SETBID"),
46 Self::InvalidBlockInfoRecord(record_id) => {
47 write!(f, "invalid block info record `{record_id}`")
48 }
49 Self::AbbrevWidthTooSmall(width) => {
50 write!(f, "abbreviation width `{width}` is too small")
51 }
52 Self::NoSuchAbbrev {
53 block_id,
54 abbrev_id,
55 } => write!(
56 f,
57 "no such abbreviation `{abbrev_id}` in block `{block_id}`"
58 ),
59 Self::MissingEndBlock(block_id) => {
60 write!(f, "missing end block for `{block_id}`")
61 }
62 Self::ReadBits(err) => err.fmt(f),
63 Self::Encoding(err) => err.fmt(f),
64 Self::Other(err) => err.fmt(f),
65 }
66 }
67}
68
69impl error::Error for Error {}
70
71impl From<bits::Error> for Error {
72 fn from(err: bits::Error) -> Self {
73 Self::ReadBits(err)
74 }
75}
76
77#[derive(Debug)]
80pub enum BlockItem<'cursor, 'input> {
81 Block(BlockIter<'cursor, 'input>),
83 Record(RecordIter<'cursor, 'input>),
85}
86
87#[derive(Debug)]
89pub struct BlockIter<'global_state, 'input> {
90 pub id: u32,
92 cursor: Cursor<'input>,
93 abbrev_width: u8,
94 block_local_abbrevs: Vec<Arc<Abbreviation>>,
96 reader: &'global_state mut BitStreamReader,
98}
99
100#[derive(Debug, Clone)]
102pub struct BitStreamReader {
103 pub(crate) block_info: HashMap<u32, BlockInfo>,
105 global_abbrevs: HashMap<u32, Vec<Arc<Abbreviation>>>,
106}
107
108impl BitStreamReader {
109 pub const TOP_LEVEL_BLOCK_ID: u32 = u32::MAX;
111
112 #[must_use]
113 pub fn new() -> Self {
114 Self {
115 block_info: HashMap::new(),
116 global_abbrevs: HashMap::new(),
117 }
118 }
119
120 pub fn iter_bitcode<'input>(&mut self, bitcode_data: &'input [u8]) -> BlockIter<'_, 'input> {
122 BlockIter::new(self, Cursor::new(bitcode_data), Self::TOP_LEVEL_BLOCK_ID, 2)
123 }
124
125 fn visit_block<V: BitStreamVisitor>(
126 mut block: BlockIter<'_, '_>,
127 visitor: &mut V,
128 ) -> Result<(), Error> {
129 let block_id = block.id;
130 while let Some(item) = block.next()? {
131 match item {
132 BlockItem::Block(new_block) => {
133 let new_id = new_block.id;
134 if visitor.should_enter_block(new_id) {
135 Self::visit_block(new_block, visitor)?;
136 visitor.did_exit_block(new_id);
137 }
138 }
139 BlockItem::Record(record) => {
140 visitor.visit(block_id, record.into_record()?);
141 }
142 }
143 }
144 Ok(())
145 }
146
147 #[inline(never)]
149 fn read_abbrev_op(cursor: &mut Cursor<'_>, num_ops_left: &mut usize) -> Result<Operand, Error> {
150 if *num_ops_left == 0 {
151 return Err(Error::InvalidAbbrev);
152 }
153 *num_ops_left -= 1;
154
155 let is_literal = cursor.read(1)?;
156 if is_literal == 1 {
157 return Ok(Operand::Scalar(ScalarOperand::Literal(cursor.read_vbr(8)?)));
158 }
159 let op_type = cursor.read(3)?;
160 Ok(match op_type {
161 1 => {
162 let width = cursor.read_vbr(5)?;
163 if width < 1 || width > 32 {
164 return Err(Error::AbbrevWidthTooSmall(width as u8));
165 }
166 Operand::Scalar(ScalarOperand::Fixed(width as u8))
167 }
168 2 => {
169 let width = cursor.read_vbr(5)?;
170 if width < 1 || width > 32 {
171 return Err(Error::AbbrevWidthTooSmall(width as u8));
172 }
173 Operand::Scalar(ScalarOperand::Vbr(width as u8))
174 }
175 3 if *num_ops_left == 1 => {
176 let op = Self::read_abbrev_op(cursor, num_ops_left)?;
177 if let Operand::Scalar(op) = op {
178 Operand::Payload(PayloadOperand::Array(op))
179 } else {
180 return Err(Error::UnexpectedOperand(Some(op)));
181 }
182 }
183 4 => Operand::Scalar(ScalarOperand::Char6),
184 5 if *num_ops_left == 0 => Operand::Payload(PayloadOperand::Blob),
185 _ => return Err(Error::InvalidAbbrev),
186 })
187 }
188
189 fn define_abbrev(
191 cursor: &mut Cursor<'_>,
192 abbrevs: &mut Vec<Arc<Abbreviation>>,
193 ) -> Result<(), Error> {
194 let mut num_ops = cursor.read_vbr(5)? as usize;
195
196 let mut fields = Vec::with_capacity(num_ops);
197 let mut payload = None;
198 while num_ops > 0 && fields.len() != fields.capacity() {
199 match Self::read_abbrev_op(cursor, &mut num_ops)? {
200 Operand::Scalar(op) => {
201 fields.push(op);
202 }
203 Operand::Payload(op) if num_ops == 0 => {
204 payload = Some(op);
205 }
206 op => return Err(Error::UnexpectedOperand(Some(op))),
207 }
208 }
209 let id = abbrevs.len() as u32;
210 let abbrev = Arc::new(Abbreviation {
211 fields,
212 payload,
213 id,
214 });
215 abbrevs.push(abbrev);
216 Ok(())
217 }
218
219 fn read_block_info_block(
221 &mut self,
222 cursor: &mut Cursor<'_>,
223 abbrev_width: u8,
224 ) -> Result<(), Error> {
225 use BuiltinAbbreviationId::*;
226
227 let mut current_block_id: Option<u32> = None;
228 loop {
229 let abbrev_id = cursor.read(abbrev_width)? as u32;
230 match BuiltinAbbreviationId::try_from(abbrev_id).map_err(|_| Error::NoSuchAbbrev {
231 block_id: 0,
232 abbrev_id,
233 })? {
234 EndBlock => {
235 cursor.align32()?;
236 return Ok(());
237 }
238 EnterSubBlock => {
239 return Err(Error::NestedBlockInBlockInfo);
240 }
241 DefineAbbreviation => {
242 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
243 Self::define_abbrev(cursor, self.global_abbrevs.entry(block_id).or_default())?;
244 }
245 UnabbreviatedRecord => {
246 let mut record = RecordIter::from_cursor(cursor)?;
247 let block = u8::try_from(record.id)
248 .ok()
249 .and_then(|c| BlockInfoCode::try_from(c).ok())
250 .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
251 match block {
252 BlockInfoCode::SetBid => {
253 let id = record
254 .u32()
255 .ok()
256 .filter(|_| record.is_empty())
257 .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
258 current_block_id = Some(id);
259 }
260 BlockInfoCode::BlockName => {
261 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
262 let block_info = self.block_info.entry(block_id).or_default();
263 if let Ok(name) = String::from_utf8(record.string()?) {
264 block_info.name = name;
265 }
266 }
267 BlockInfoCode::SetRecordName => {
268 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
269 let record_id = record
270 .u64()
271 .map_err(|_| Error::InvalidBlockInfoRecord(record.id))?;
272 let block_info = self.block_info.entry(block_id).or_default();
273 if let Ok(name) = String::from_utf8(record.string()?) {
274 block_info.record_names.insert(record_id, name);
275 }
276 }
277 }
278 }
279 }
280 }
281 }
282
283 pub fn read_block<V: BitStreamVisitor>(
285 &mut self,
286 cursor: Cursor<'_>,
287 block_id: u32,
288 abbrev_width: u8,
289 visitor: &mut V,
290 ) -> Result<(), Error> {
291 Self::visit_block(
292 BlockIter::new(self, cursor, block_id, abbrev_width),
293 visitor,
294 )
295 }
296}
297
298impl<'global_state, 'input> BlockIter<'global_state, 'input> {
299 pub fn next_record<'parent>(
300 &'parent mut self,
301 ) -> Result<Option<RecordIter<'parent, 'input>>, Error> {
302 match self.next()? {
303 None => Ok(None),
304 Some(BlockItem::Record(rec)) => Ok(Some(rec)),
305 Some(BlockItem::Block(block)) => Err(Error::UnexpectedBlock(block.id)),
306 }
307 }
308
309 pub fn next<'parent>(&'parent mut self) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
311 if self.cursor.is_at_end() {
312 return if self.id == BitStreamReader::TOP_LEVEL_BLOCK_ID {
313 Ok(None)
314 } else {
315 Err(Error::MissingEndBlock(self.id))
316 };
317 }
318
319 let abbrev_id = self.cursor.read(self.abbrev_width)? as u32;
320
321 if let Ok(builtin_abbrev) = BuiltinAbbreviationId::try_from(abbrev_id) {
322 use BuiltinAbbreviationId::*;
323 match builtin_abbrev {
324 EndBlock => {
325 self.cursor.align32()?;
326 Ok(None)
327 }
328 EnterSubBlock => {
329 let block_id = self.cursor.read_vbr(8)? as u32;
330 let new_abbrev_width = self.cursor.read_vbr(4)? as u8;
331 self.cursor.align32()?;
332 let block_length = self.cursor.read(32)? as usize * 4;
333 let mut cursor = self.cursor.take_slice(block_length)?;
334
335 if block_id == 0 {
336 self.reader
337 .read_block_info_block(&mut cursor, new_abbrev_width)?;
338 return self.next();
339 }
340
341 let block_iter =
343 BlockIter::new(self.reader, cursor, block_id, new_abbrev_width);
344 Ok(Some(BlockItem::Block(block_iter)))
345 }
346 DefineAbbreviation => {
347 BitStreamReader::define_abbrev(
348 &mut self.cursor,
349 &mut self.block_local_abbrevs,
350 )?;
351 self.next()
352 }
353 UnabbreviatedRecord => {
354 let record_iter = RecordIter::from_cursor(&mut self.cursor)?;
355 Ok(Some(BlockItem::Record(record_iter)))
356 }
357 }
358 } else {
359 let abbrev_index = abbrev_id as usize - 4;
360 let global_abbrevs = self
361 .reader
362 .global_abbrevs
363 .get(&self.id)
364 .map(|v| v.as_slice())
365 .unwrap_or_default();
366
367 let abbrev = if let Some(local_index) = abbrev_index.checked_sub(global_abbrevs.len()) {
370 self.block_local_abbrevs.get(local_index).cloned()
371 } else {
372 global_abbrevs.get(abbrev_index).cloned()
373 };
374
375 let abbrev = abbrev.ok_or(Error::NoSuchAbbrev {
376 block_id: self.id,
377 abbrev_id,
378 })?;
379
380 Ok(Some(BlockItem::Record(RecordIter::from_cursor_abbrev(
381 &mut self.cursor,
382 abbrev,
383 )?)))
384 }
385 }
386
387 #[must_use]
392 pub fn debug_abbrev_width(&self) -> u8 {
393 self.abbrev_width
394 }
395
396 #[must_use]
401 pub fn debug_data_len(&self) -> Option<usize> {
402 let bits = self.cursor.unconsumed_bit_len();
403 (bits & 31 != 0).then_some(bits >> 3)
404 }
405
406 fn new(
407 reader: &'global_state mut BitStreamReader,
408 cursor: Cursor<'input>,
409 block_id: u32,
410 abbrev_width: u8,
411 ) -> Self {
412 Self {
413 id: block_id,
414 cursor,
415 abbrev_width,
416 block_local_abbrevs: Vec::new(),
417 reader,
418 }
419 }
420}