1use crate::bitstream::{PayloadOperand, ScalarOperand};
2use std::num::TryFromIntError;
3use std::string::FromUtf8Error;
4use std::sync::Arc;
5use std::{collections::HashMap, convert::TryFrom, error, fmt};
6
7use crate::bitcode::{BlockInfo, RecordIter};
8use crate::bits::{self, Cursor};
9use crate::bitstream::{Abbreviation, BlockInfoCode, BuiltinAbbreviationId, Operand};
10use crate::visitor::BitStreamVisitor;
11
12#[derive(Debug, Clone)]
14pub enum Error {
15 EndOfRecord,
16 ValueOverflow,
17 UnexpectedOperand(Option<Operand>),
18 InvalidSignature(u32),
19 InvalidAbbrev,
20 NestedBlockInBlockInfo,
21 MissingSetBid,
22 InvalidBlockInfoRecord(u64),
23 NoSuchAbbrev { block_id: u32, abbrev_id: u32 },
24 UnexpectedBlock(u32),
25 MissingEndBlock(u32),
26 AbbrevWidthTooSmall(u8),
27 ReadBits(bits::Error),
28 Encoding(FromUtf8Error),
29 Other(&'static str),
30}
31
32impl fmt::Display for Error {
33 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34 match self {
35 Self::EndOfRecord => write!(f, "read past end of record"),
36 Self::ValueOverflow => write!(
37 f,
38 "integer out of range (likely due to misparsing the format)"
39 ),
40 Self::UnexpectedOperand(op) => write!(f, "Unexpected operand {op:?}"),
41 Self::InvalidSignature(sig) => {
42 write!(f, "invalid signature (magic number): 0x{sig:x}")
43 }
44 Self::InvalidAbbrev => write!(f, "invalid abbreviation"),
45 Self::NestedBlockInBlockInfo => {
46 write!(f, "nested block in block info")
47 }
48 Self::UnexpectedBlock(id) => write!(f, "nested block {id}"),
49 Self::MissingSetBid => write!(f, "missing SETBID"),
50 Self::InvalidBlockInfoRecord(record_id) => {
51 write!(f, "invalid block info record `{record_id}`")
52 }
53 Self::AbbrevWidthTooSmall(width) => {
54 write!(f, "abbreviation width `{width}` is too small")
55 }
56 Self::NoSuchAbbrev {
57 block_id,
58 abbrev_id,
59 } => write!(
60 f,
61 "no such abbreviation `{abbrev_id}` in block `{block_id}`"
62 ),
63 Self::MissingEndBlock(block_id) => {
64 write!(f, "missing end block for `{block_id}`")
65 }
66 Self::ReadBits(err) => err.fmt(f),
67 Self::Encoding(err) => err.fmt(f),
68 Self::Other(err) => err.fmt(f),
69 }
70 }
71}
72
73impl error::Error for Error {}
74
75impl From<bits::Error> for Error {
76 fn from(err: bits::Error) -> Self {
77 Self::ReadBits(err)
78 }
79}
80
81impl From<TryFromIntError> for Error {
82 fn from(_: TryFromIntError) -> Self {
83 Self::ValueOverflow
84 }
85}
86
87#[derive(Debug)]
90pub enum BlockItem<'cursor, 'input> {
91 Block(BlockIter<'cursor, 'input>),
93 Record(RecordIter<'cursor, 'input>),
95}
96
97#[derive(Debug)]
99pub struct BlockIter<'global_state, 'input> {
100 pub id: u32,
102 cursor: Cursor<'input>,
103 abbrev_width: u8,
104 block_local_abbrevs: Vec<Arc<Abbreviation>>,
106 reader: &'global_state mut BitStreamReader,
108}
109
110#[derive(Debug, Clone)]
112pub struct BitStreamReader {
113 pub(crate) block_info: HashMap<u32, BlockInfo>,
115 global_abbrevs: HashMap<u32, Vec<Arc<Abbreviation>>>,
116}
117
118impl BitStreamReader {
119 pub const TOP_LEVEL_BLOCK_ID: u32 = u32::MAX;
121
122 #[must_use]
123 pub fn new() -> Self {
124 Self {
125 block_info: HashMap::new(),
126 global_abbrevs: HashMap::new(),
127 }
128 }
129
130 pub fn iter_bitcode<'input>(&mut self, bitcode_data: &'input [u8]) -> BlockIter<'_, 'input> {
132 BlockIter::new(self, Cursor::new(bitcode_data), Self::TOP_LEVEL_BLOCK_ID, 2)
133 }
134
135 fn visit_block<V: BitStreamVisitor>(
136 mut block: BlockIter<'_, '_>,
137 visitor: &mut V,
138 ) -> Result<(), Error> {
139 let block_id = block.id;
140 while let Some(item) = block.try_next()? {
141 match item {
142 BlockItem::Block(new_block) => {
143 let new_id = new_block.id;
144 if visitor.should_enter_block(new_id) {
145 Self::visit_block(new_block, visitor)?;
146 visitor.did_exit_block(new_id);
147 }
148 }
149 BlockItem::Record(record) => {
150 visitor.visit(block_id, record.into_record()?);
151 }
152 }
153 }
154 Ok(())
155 }
156
157 #[inline(never)]
159 fn read_abbrev_op(cursor: &mut Cursor<'_>, num_ops_left: &mut usize) -> Result<Operand, Error> {
160 if *num_ops_left == 0 {
161 return Err(Error::InvalidAbbrev);
162 }
163 *num_ops_left -= 1;
164
165 let is_literal = cursor.read(1)?;
166 if is_literal == 1 {
167 return Ok(Operand::Scalar(ScalarOperand::Literal(
168 cursor.read_vbr_fixed::<8>()?,
169 )));
170 }
171 let op_type = cursor.read(3)?;
172 Ok(match op_type {
173 1 => {
174 let width = cursor.read_vbr_fixed::<5>()?;
175 if width < 1 || width > 32 {
176 return Err(Error::AbbrevWidthTooSmall(width as u8));
177 }
178 Operand::Scalar(ScalarOperand::Fixed(width as u8))
179 }
180 2 => {
181 let width = cursor.read_vbr_fixed::<5>()?;
182 if width < 1 || width > 32 {
183 return Err(Error::AbbrevWidthTooSmall(width as u8));
184 }
185 Operand::Scalar(ScalarOperand::Vbr(width as u8))
186 }
187 3 if *num_ops_left == 1 => {
188 let op = Self::read_abbrev_op(cursor, num_ops_left)?;
189 if let Operand::Scalar(op) = op {
190 Operand::Payload(PayloadOperand::Array(op))
191 } else {
192 return Err(Error::UnexpectedOperand(Some(op)));
193 }
194 }
195 4 => Operand::Scalar(ScalarOperand::Char6),
196 5 if *num_ops_left == 0 => Operand::Payload(PayloadOperand::Blob),
197 _ => return Err(Error::InvalidAbbrev),
198 })
199 }
200
201 fn define_abbrev(
203 cursor: &mut Cursor<'_>,
204 abbrevs: &mut Vec<Arc<Abbreviation>>,
205 ) -> Result<(), Error> {
206 let mut num_ops = cursor.read_vbr_fixed::<5>()? as usize;
207
208 let mut fields = Vec::with_capacity(num_ops);
209 let mut payload = None;
210 while num_ops > 0 && fields.len() != fields.capacity() {
211 match Self::read_abbrev_op(cursor, &mut num_ops)? {
212 Operand::Scalar(op) => {
213 fields.push(op);
214 }
215 Operand::Payload(op) if num_ops == 0 => {
216 payload = Some(op);
217 }
218 op => return Err(Error::UnexpectedOperand(Some(op))),
219 }
220 }
221 let id = abbrevs.len() as u32;
222 let abbrev = Arc::new(Abbreviation {
223 fields,
224 payload,
225 id,
226 });
227 abbrevs.push(abbrev);
228 Ok(())
229 }
230
231 fn read_block_info_block(
233 &mut self,
234 cursor: &mut Cursor<'_>,
235 abbrev_width: u8,
236 ) -> Result<(), Error> {
237 use BuiltinAbbreviationId::*;
238
239 let mut current_block_id: Option<u32> = None;
240 loop {
241 let abbrev_id = cursor.read(abbrev_width)? as u32;
242 match BuiltinAbbreviationId::try_from(abbrev_id).map_err(|_| Error::NoSuchAbbrev {
243 block_id: 0,
244 abbrev_id,
245 })? {
246 EndBlock => {
247 cursor.align32()?;
248 return Ok(());
249 }
250 EnterSubBlock => {
251 return Err(Error::NestedBlockInBlockInfo);
252 }
253 DefineAbbreviation => {
254 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
255 Self::define_abbrev(cursor, self.global_abbrevs.entry(block_id).or_default())?;
256 }
257 UnabbreviatedRecord => {
258 let mut record = RecordIter::from_cursor(cursor)?;
259 let block = u8::try_from(record.id)
260 .ok()
261 .and_then(|c| BlockInfoCode::try_from(c).ok())
262 .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
263 match block {
264 BlockInfoCode::SetBid => {
265 let id = record
266 .u32()
267 .ok()
268 .filter(|_| record.is_empty())
269 .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
270 current_block_id = Some(id);
271 }
272 BlockInfoCode::BlockName => {
273 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
274 let block_info = self.block_info.entry(block_id).or_default();
275 if let Ok(name) = String::from_utf8(record.string()?) {
276 block_info.name = name;
277 }
278 }
279 BlockInfoCode::SetRecordName => {
280 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
281 let record_id = record
282 .u64()
283 .map_err(|_| Error::InvalidBlockInfoRecord(record.id))?;
284 let block_info = self.block_info.entry(block_id).or_default();
285 if let Ok(name) = String::from_utf8(record.string()?) {
286 block_info.record_names.insert(record_id, name);
287 }
288 }
289 }
290 }
291 }
292 }
293 }
294
295 pub fn read_block<V: BitStreamVisitor>(
297 &mut self,
298 cursor: Cursor<'_>,
299 block_id: u32,
300 abbrev_width: u8,
301 visitor: &mut V,
302 ) -> Result<(), Error> {
303 Self::visit_block(
304 BlockIter::new(self, cursor, block_id, abbrev_width),
305 visitor,
306 )
307 }
308}
309
310impl<'global_state, 'input> BlockIter<'global_state, 'input> {
311 pub fn next_record<'parent>(
312 &'parent mut self,
313 ) -> Result<Option<RecordIter<'parent, 'input>>, Error> {
314 match self.try_next()? {
315 None => Ok(None),
316 Some(BlockItem::Record(rec)) => Ok(Some(rec)),
317 Some(BlockItem::Block(block)) => Err(Error::UnexpectedBlock(block.id)),
318 }
319 }
320
321 #[doc(hidden)]
322 #[deprecated(note = "renamed to `try_next`")]
323 pub fn next<'parent>(&'parent mut self) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
324 self.try_next()
325 }
326
327 pub fn try_next<'parent>(
329 &'parent mut self,
330 ) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
331 if self.cursor.is_at_end() {
332 return if self.id == BitStreamReader::TOP_LEVEL_BLOCK_ID {
333 Ok(None)
334 } else {
335 Err(Error::MissingEndBlock(self.id))
336 };
337 }
338
339 let abbrev_id = self.cursor.read(self.abbrev_width)? as u32;
340
341 if let Ok(builtin_abbrev) = BuiltinAbbreviationId::try_from(abbrev_id) {
342 use BuiltinAbbreviationId::*;
343 match builtin_abbrev {
344 EndBlock => {
345 self.cursor.align32()?;
346 Ok(None)
347 }
348 EnterSubBlock => {
349 let block_id = self.cursor.read_vbr_fixed::<8>()? as u32;
350 let new_abbrev_width = self.cursor.read_vbr_fixed::<4>()? as u8;
351 self.cursor.align32()?;
352 let block_length = self.cursor.read(32)? as usize * 4;
353 let mut cursor = self.cursor.take_slice(block_length)?;
354
355 if block_id == 0 {
356 self.reader
357 .read_block_info_block(&mut cursor, new_abbrev_width)?;
358 return self.try_next();
359 }
360
361 let block_iter =
363 BlockIter::new(self.reader, cursor, block_id, new_abbrev_width);
364 Ok(Some(BlockItem::Block(block_iter)))
365 }
366 DefineAbbreviation => {
367 BitStreamReader::define_abbrev(
368 &mut self.cursor,
369 &mut self.block_local_abbrevs,
370 )?;
371 self.try_next()
372 }
373 UnabbreviatedRecord => {
374 let record_iter = RecordIter::from_cursor(&mut self.cursor)?;
375 Ok(Some(BlockItem::Record(record_iter)))
376 }
377 }
378 } else {
379 let abbrev_index = abbrev_id as usize - 4;
380 let global_abbrevs = self
381 .reader
382 .global_abbrevs
383 .get(&self.id)
384 .map(|v| v.as_slice())
385 .unwrap_or_default();
386
387 let abbrev = if let Some(local_index) = abbrev_index.checked_sub(global_abbrevs.len()) {
390 self.block_local_abbrevs.get(local_index).cloned()
391 } else {
392 global_abbrevs.get(abbrev_index).cloned()
393 };
394
395 let abbrev = abbrev.ok_or(Error::NoSuchAbbrev {
396 block_id: self.id,
397 abbrev_id,
398 })?;
399
400 Ok(Some(BlockItem::Record(RecordIter::from_cursor_abbrev(
401 &mut self.cursor,
402 abbrev,
403 )?)))
404 }
405 }
406
407 #[must_use]
412 pub fn debug_abbrev_width(&self) -> u8 {
413 self.abbrev_width
414 }
415
416 #[must_use]
421 pub fn debug_data_len(&self) -> Option<usize> {
422 let bits = self.cursor.unconsumed_bit_len();
423 (bits & 31 != 0).then_some(bits >> 3)
424 }
425
426 fn new(
427 reader: &'global_state mut BitStreamReader,
428 cursor: Cursor<'input>,
429 block_id: u32,
430 abbrev_width: u8,
431 ) -> Self {
432 Self {
433 id: block_id,
434 cursor,
435 abbrev_width,
436 block_local_abbrevs: Vec::new(),
437 reader,
438 }
439 }
440}