1use crate::bitstream::{PayloadOperand, ScalarOperand};
2use std::string::FromUtf8Error;
3use std::sync::Arc;
4use std::{collections::HashMap, convert::TryFrom, error, fmt};
5
6use crate::bitcode::{BlockInfo, RecordIter};
7use crate::bits::{self, Cursor};
8use crate::bitstream::{Abbreviation, BlockInfoCode, BuiltinAbbreviationId, Operand};
9use crate::visitor::BitStreamVisitor;
10
11#[derive(Debug, Clone)]
13pub enum Error {
14 EndOfRecord,
15 ValueOverflow,
16 UnexpectedOperand(Option<Operand>),
17 InvalidSignature(u32),
18 InvalidAbbrev,
19 NestedBlockInBlockInfo,
20 MissingSetBid,
21 InvalidBlockInfoRecord(u64),
22 NoSuchAbbrev { block_id: u32, abbrev_id: u32 },
23 UnexpectedBlock(u32),
24 MissingEndBlock(u32),
25 AbbrevWidthTooSmall(u8),
26 ReadBits(bits::Error),
27 Encoding(FromUtf8Error),
28 Other(&'static str),
29}
30
31impl fmt::Display for Error {
32 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33 match self {
34 Self::EndOfRecord => write!(f, "read past end of record"),
35 Self::ValueOverflow => write!(
36 f,
37 "integer out of range (likely due to misparsing the format)"
38 ),
39 Self::UnexpectedOperand(op) => write!(f, "Unexpected operand {op:?}"),
40 Self::InvalidSignature(sig) => {
41 write!(f, "invalid signature (magic number): 0x{sig:x}")
42 }
43 Self::InvalidAbbrev => write!(f, "invalid abbreviation"),
44 Self::NestedBlockInBlockInfo => {
45 write!(f, "nested block in block info")
46 }
47 Self::UnexpectedBlock(id) => write!(f, "nested block {id}"),
48 Self::MissingSetBid => write!(f, "missing SETBID"),
49 Self::InvalidBlockInfoRecord(record_id) => {
50 write!(f, "invalid block info record `{record_id}`")
51 }
52 Self::AbbrevWidthTooSmall(width) => {
53 write!(f, "abbreviation width `{width}` is too small")
54 }
55 Self::NoSuchAbbrev {
56 block_id,
57 abbrev_id,
58 } => write!(
59 f,
60 "no such abbreviation `{abbrev_id}` in block `{block_id}`"
61 ),
62 Self::MissingEndBlock(block_id) => {
63 write!(f, "missing end block for `{block_id}`")
64 }
65 Self::ReadBits(err) => err.fmt(f),
66 Self::Encoding(err) => err.fmt(f),
67 Self::Other(err) => err.fmt(f),
68 }
69 }
70}
71
72impl error::Error for Error {}
73
74impl From<bits::Error> for Error {
75 fn from(err: bits::Error) -> Self {
76 Self::ReadBits(err)
77 }
78}
79
80#[derive(Debug)]
83pub enum BlockItem<'cursor, 'input> {
84 Block(BlockIter<'cursor, 'input>),
86 Record(RecordIter<'cursor, 'input>),
88}
89
90#[derive(Debug)]
92pub struct BlockIter<'global_state, 'input> {
93 pub id: u32,
95 cursor: Cursor<'input>,
96 abbrev_width: u8,
97 block_local_abbrevs: Vec<Arc<Abbreviation>>,
99 reader: &'global_state mut BitStreamReader,
101}
102
103#[derive(Debug, Clone)]
105pub struct BitStreamReader {
106 pub(crate) block_info: HashMap<u32, BlockInfo>,
108 global_abbrevs: HashMap<u32, Vec<Arc<Abbreviation>>>,
109}
110
111impl BitStreamReader {
112 pub const TOP_LEVEL_BLOCK_ID: u32 = u32::MAX;
114
115 #[must_use]
116 pub fn new() -> Self {
117 Self {
118 block_info: HashMap::new(),
119 global_abbrevs: HashMap::new(),
120 }
121 }
122
123 pub fn iter_bitcode<'input>(&mut self, bitcode_data: &'input [u8]) -> BlockIter<'_, 'input> {
125 BlockIter::new(self, Cursor::new(bitcode_data), Self::TOP_LEVEL_BLOCK_ID, 2)
126 }
127
128 fn visit_block<V: BitStreamVisitor>(
129 mut block: BlockIter<'_, '_>,
130 visitor: &mut V,
131 ) -> Result<(), Error> {
132 let block_id = block.id;
133 while let Some(item) = block.try_next()? {
134 match item {
135 BlockItem::Block(new_block) => {
136 let new_id = new_block.id;
137 if visitor.should_enter_block(new_id) {
138 Self::visit_block(new_block, visitor)?;
139 visitor.did_exit_block(new_id);
140 }
141 }
142 BlockItem::Record(record) => {
143 visitor.visit(block_id, record.into_record()?);
144 }
145 }
146 }
147 Ok(())
148 }
149
150 #[inline(never)]
152 fn read_abbrev_op(cursor: &mut Cursor<'_>, num_ops_left: &mut usize) -> Result<Operand, Error> {
153 if *num_ops_left == 0 {
154 return Err(Error::InvalidAbbrev);
155 }
156 *num_ops_left -= 1;
157
158 let is_literal = cursor.read(1)?;
159 if is_literal == 1 {
160 return Ok(Operand::Scalar(ScalarOperand::Literal(cursor.read_vbr(8)?)));
161 }
162 let op_type = cursor.read(3)?;
163 Ok(match op_type {
164 1 => {
165 let width = cursor.read_vbr(5)?;
166 if width < 1 || width > 32 {
167 return Err(Error::AbbrevWidthTooSmall(width as u8));
168 }
169 Operand::Scalar(ScalarOperand::Fixed(width as u8))
170 }
171 2 => {
172 let width = cursor.read_vbr(5)?;
173 if width < 1 || width > 32 {
174 return Err(Error::AbbrevWidthTooSmall(width as u8));
175 }
176 Operand::Scalar(ScalarOperand::Vbr(width as u8))
177 }
178 3 if *num_ops_left == 1 => {
179 let op = Self::read_abbrev_op(cursor, num_ops_left)?;
180 if let Operand::Scalar(op) = op {
181 Operand::Payload(PayloadOperand::Array(op))
182 } else {
183 return Err(Error::UnexpectedOperand(Some(op)));
184 }
185 }
186 4 => Operand::Scalar(ScalarOperand::Char6),
187 5 if *num_ops_left == 0 => Operand::Payload(PayloadOperand::Blob),
188 _ => return Err(Error::InvalidAbbrev),
189 })
190 }
191
192 fn define_abbrev(
194 cursor: &mut Cursor<'_>,
195 abbrevs: &mut Vec<Arc<Abbreviation>>,
196 ) -> Result<(), Error> {
197 let mut num_ops = cursor.read_vbr(5)? as usize;
198
199 let mut fields = Vec::with_capacity(num_ops);
200 let mut payload = None;
201 while num_ops > 0 && fields.len() != fields.capacity() {
202 match Self::read_abbrev_op(cursor, &mut num_ops)? {
203 Operand::Scalar(op) => {
204 fields.push(op);
205 }
206 Operand::Payload(op) if num_ops == 0 => {
207 payload = Some(op);
208 }
209 op => return Err(Error::UnexpectedOperand(Some(op))),
210 }
211 }
212 let id = abbrevs.len() as u32;
213 let abbrev = Arc::new(Abbreviation {
214 fields,
215 payload,
216 id,
217 });
218 abbrevs.push(abbrev);
219 Ok(())
220 }
221
222 fn read_block_info_block(
224 &mut self,
225 cursor: &mut Cursor<'_>,
226 abbrev_width: u8,
227 ) -> Result<(), Error> {
228 use BuiltinAbbreviationId::*;
229
230 let mut current_block_id: Option<u32> = None;
231 loop {
232 let abbrev_id = cursor.read(abbrev_width)? as u32;
233 match BuiltinAbbreviationId::try_from(abbrev_id).map_err(|_| Error::NoSuchAbbrev {
234 block_id: 0,
235 abbrev_id,
236 })? {
237 EndBlock => {
238 cursor.align32()?;
239 return Ok(());
240 }
241 EnterSubBlock => {
242 return Err(Error::NestedBlockInBlockInfo);
243 }
244 DefineAbbreviation => {
245 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
246 Self::define_abbrev(cursor, self.global_abbrevs.entry(block_id).or_default())?;
247 }
248 UnabbreviatedRecord => {
249 let mut record = RecordIter::from_cursor(cursor)?;
250 let block = u8::try_from(record.id)
251 .ok()
252 .and_then(|c| BlockInfoCode::try_from(c).ok())
253 .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
254 match block {
255 BlockInfoCode::SetBid => {
256 let id = record
257 .u32()
258 .ok()
259 .filter(|_| record.is_empty())
260 .ok_or(Error::InvalidBlockInfoRecord(record.id))?;
261 current_block_id = Some(id);
262 }
263 BlockInfoCode::BlockName => {
264 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
265 let block_info = self.block_info.entry(block_id).or_default();
266 if let Ok(name) = String::from_utf8(record.string()?) {
267 block_info.name = name;
268 }
269 }
270 BlockInfoCode::SetRecordName => {
271 let block_id = current_block_id.ok_or(Error::MissingSetBid)?;
272 let record_id = record
273 .u64()
274 .map_err(|_| Error::InvalidBlockInfoRecord(record.id))?;
275 let block_info = self.block_info.entry(block_id).or_default();
276 if let Ok(name) = String::from_utf8(record.string()?) {
277 block_info.record_names.insert(record_id, name);
278 }
279 }
280 }
281 }
282 }
283 }
284 }
285
286 pub fn read_block<V: BitStreamVisitor>(
288 &mut self,
289 cursor: Cursor<'_>,
290 block_id: u32,
291 abbrev_width: u8,
292 visitor: &mut V,
293 ) -> Result<(), Error> {
294 Self::visit_block(
295 BlockIter::new(self, cursor, block_id, abbrev_width),
296 visitor,
297 )
298 }
299}
300
301impl<'global_state, 'input> BlockIter<'global_state, 'input> {
302 pub fn next_record<'parent>(
303 &'parent mut self,
304 ) -> Result<Option<RecordIter<'parent, 'input>>, Error> {
305 match self.try_next()? {
306 None => Ok(None),
307 Some(BlockItem::Record(rec)) => Ok(Some(rec)),
308 Some(BlockItem::Block(block)) => Err(Error::UnexpectedBlock(block.id)),
309 }
310 }
311
312 #[doc(hidden)]
313 #[deprecated(note = "renamed to `try_next`")]
314 pub fn next<'parent>(&'parent mut self) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
315 self.try_next()
316 }
317
318 pub fn try_next<'parent>(
320 &'parent mut self,
321 ) -> Result<Option<BlockItem<'parent, 'input>>, Error> {
322 if self.cursor.is_at_end() {
323 return if self.id == BitStreamReader::TOP_LEVEL_BLOCK_ID {
324 Ok(None)
325 } else {
326 Err(Error::MissingEndBlock(self.id))
327 };
328 }
329
330 let abbrev_id = self.cursor.read(self.abbrev_width)? as u32;
331
332 if let Ok(builtin_abbrev) = BuiltinAbbreviationId::try_from(abbrev_id) {
333 use BuiltinAbbreviationId::*;
334 match builtin_abbrev {
335 EndBlock => {
336 self.cursor.align32()?;
337 Ok(None)
338 }
339 EnterSubBlock => {
340 let block_id = self.cursor.read_vbr(8)? as u32;
341 let new_abbrev_width = self.cursor.read_vbr(4)? as u8;
342 self.cursor.align32()?;
343 let block_length = self.cursor.read(32)? as usize * 4;
344 let mut cursor = self.cursor.take_slice(block_length)?;
345
346 if block_id == 0 {
347 self.reader
348 .read_block_info_block(&mut cursor, new_abbrev_width)?;
349 return self.try_next();
350 }
351
352 let block_iter =
354 BlockIter::new(self.reader, cursor, block_id, new_abbrev_width);
355 Ok(Some(BlockItem::Block(block_iter)))
356 }
357 DefineAbbreviation => {
358 BitStreamReader::define_abbrev(
359 &mut self.cursor,
360 &mut self.block_local_abbrevs,
361 )?;
362 self.try_next()
363 }
364 UnabbreviatedRecord => {
365 let record_iter = RecordIter::from_cursor(&mut self.cursor)?;
366 Ok(Some(BlockItem::Record(record_iter)))
367 }
368 }
369 } else {
370 let abbrev_index = abbrev_id as usize - 4;
371 let global_abbrevs = self
372 .reader
373 .global_abbrevs
374 .get(&self.id)
375 .map(|v| v.as_slice())
376 .unwrap_or_default();
377
378 let abbrev = if let Some(local_index) = abbrev_index.checked_sub(global_abbrevs.len()) {
381 self.block_local_abbrevs.get(local_index).cloned()
382 } else {
383 global_abbrevs.get(abbrev_index).cloned()
384 };
385
386 let abbrev = abbrev.ok_or(Error::NoSuchAbbrev {
387 block_id: self.id,
388 abbrev_id,
389 })?;
390
391 Ok(Some(BlockItem::Record(RecordIter::from_cursor_abbrev(
392 &mut self.cursor,
393 abbrev,
394 )?)))
395 }
396 }
397
398 #[must_use]
403 pub fn debug_abbrev_width(&self) -> u8 {
404 self.abbrev_width
405 }
406
407 #[must_use]
412 pub fn debug_data_len(&self) -> Option<usize> {
413 let bits = self.cursor.unconsumed_bit_len();
414 (bits & 31 != 0).then_some(bits >> 3)
415 }
416
417 fn new(
418 reader: &'global_state mut BitStreamReader,
419 cursor: Cursor<'input>,
420 block_id: u32,
421 abbrev_width: u8,
422 ) -> Self {
423 Self {
424 id: block_id,
425 cursor,
426 abbrev_width,
427 block_local_abbrevs: Vec::new(),
428 reader,
429 }
430 }
431}