1use crate::bits::Cursor;
2use crate::bitstream::{Abbreviation, Operand};
3use crate::bitstream::{PayloadOperand, ScalarOperand};
4use std::cell::RefCell;
5use std::collections::HashMap;
6use std::fmt;
7use std::num::NonZero;
8use std::ops::Range;
9use std::sync::Arc;
10
11use crate::read::{BitStreamReader, Error};
12use crate::visitor::{BitStreamVisitor, CollectingVisitor};
13
14const LLVM_BITCODE_WRAPPER_MAGIC: u32 = 0x0B17C0DE;
15
16#[derive(Debug, Clone)]
19pub struct Bitcode {
20 pub signature: Signature,
21 pub elements: Vec<BitcodeElement>,
22 pub block_info: HashMap<u32, BlockInfo>,
23}
24
25#[derive(Debug, Clone)]
32pub struct Block {
33 pub id: u32,
35 pub elements: Vec<BitcodeElement>,
37}
38
39#[derive(Debug, Clone)]
40pub enum Payload {
41 Array(Vec<u64>),
42 Char6String(String),
43 Blob(Vec<u8>),
44}
45
46#[derive(Debug, Clone)]
50pub struct Record {
51 pub id: u64,
53 fields: Vec<u64>,
55 payload: Option<Payload>,
57}
58
59impl Record {
60 #[must_use]
61 pub fn fields(&self) -> &[u64] {
62 &self.fields
63 }
64
65 pub fn take_payload(&mut self) -> Option<Payload> {
66 self.payload.take()
67 }
68}
69
70#[derive(Debug, Clone)]
71enum Ops {
72 Abbrev {
73 state: usize,
77 abbrev: Arc<Abbreviation>,
78 },
79 Full(usize),
81}
82
83pub struct RecordIter<'cursor, 'input> {
87 pub id: u64,
89 cursor: &'cursor mut Cursor<'input>,
90 ops: Ops,
91}
92
93impl<'cursor, 'input> RecordIter<'cursor, 'input> {
94 pub(crate) fn into_record(mut self) -> Result<Record, Error> {
95 let mut fields = Vec::with_capacity(self.len());
96 while let Some(f) = self.try_next()? {
97 fields.push(f);
98 }
99 Ok(Record {
100 id: self.id,
101 fields,
102 payload: self.payload().ok().flatten(),
103 })
104 }
105
106 fn read_scalar_operand(cursor: &mut Cursor<'_>, operand: ScalarOperand) -> Result<u64, Error> {
107 match operand {
108 ScalarOperand::Char6 => {
109 let value = cursor.read(6)? as u8;
110 Ok(u64::from(match value {
111 0..=25 => value + b'a',
112 26..=51 => value + (b'A' - 26),
113 52..=61 => value - (52 - b'0'),
114 62 => b'.',
115 63 => b'_',
116 _ => return Err(Error::InvalidAbbrev),
117 }))
118 }
119 ScalarOperand::Literal(value) => Ok(value),
120 ScalarOperand::Fixed(width) => Ok(cursor.read(width)?),
121 ScalarOperand::Vbr(width) => Ok(cursor.read_vbr(width)?),
122 }
123 }
124
125 pub(crate) fn from_cursor_abbrev(
126 cursor: &'cursor mut Cursor<'input>,
127 abbrev: Arc<Abbreviation>,
128 ) -> Result<Self, Error> {
129 let id =
130 Self::read_scalar_operand(cursor, *abbrev.fields.first().ok_or(Error::InvalidAbbrev)?)?;
131 Ok(Self {
132 id,
133 cursor,
134 ops: Ops::Abbrev { state: 1, abbrev },
135 })
136 }
137
138 pub(crate) fn from_cursor(cursor: &'cursor mut Cursor<'input>) -> Result<Self, Error> {
139 let id = cursor.read_vbr(6)?;
140 let num_ops = cursor.read_vbr(6)? as usize;
141 Ok(Self {
142 id,
143 cursor,
144 ops: Ops::Full(num_ops),
145 })
146 }
147
148 pub fn payload(&mut self) -> Result<Option<Payload>, Error> {
149 match &mut self.ops {
150 Ops::Abbrev { state, abbrev } => {
151 if *state > abbrev.fields.len() {
152 return Ok(None);
153 }
154 Ok(match abbrev.payload {
155 Some(PayloadOperand::Blob) => Some(Payload::Blob(self.blob()?.to_vec())),
156 Some(PayloadOperand::Array(ScalarOperand::Char6)) => {
157 Some(Payload::Char6String(
158 String::from_utf8(self.string()?).map_err(|_| Error::InvalidAbbrev)?,
159 ))
160 }
161 Some(PayloadOperand::Array(_)) => Some(Payload::Array(self.array()?)),
162 None => None,
163 })
164 }
165 Ops::Full(_) => Ok(None),
166 }
167 }
168
169 #[must_use]
171 pub fn len(&self) -> usize {
172 match &self.ops {
173 Ops::Abbrev { state, abbrev } => abbrev.fields.len().saturating_sub(*state),
174 Ops::Full(num_ops) => *num_ops,
175 }
176 }
177
178 #[must_use]
180 pub fn is_empty(&self) -> bool {
181 self.len() == 0
182 }
183
184 #[doc(hidden)]
185 #[deprecated(note = "renamed to `try_next()` to avoid confusion with `Iterator::next`")]
186 #[allow(clippy::should_implement_trait)]
187 pub fn next(&mut self) -> Result<Option<u64>, Error> {
188 self.try_next()
189 }
190
191 #[doc(alias = "next")]
193 pub fn try_next(&mut self) -> Result<Option<u64>, Error> {
194 match &mut self.ops {
195 Ops::Abbrev { state, abbrev } => {
196 let Some(&op) = abbrev.fields.get(*state) else {
197 return Ok(None);
198 };
199 *state += 1;
200 Ok(Some(Self::read_scalar_operand(self.cursor, op)?))
201 }
202 Ops::Full(num_ops) => {
203 if *num_ops == 0 {
204 return Ok(None);
205 }
206 *num_ops -= 1;
207 Ok(Some(self.cursor.read_vbr(6)?))
208 }
209 }
210 }
211
212 #[cfg_attr(debug_assertions, track_caller)]
213 pub fn u64(&mut self) -> Result<u64, Error> {
214 match self.try_next()? {
215 Some(v) => Ok(v),
216 None => {
217 debug_assert!(false, "unexpected end of record");
218 Err(Error::EndOfRecord)
219 }
220 }
221 }
222
223 pub fn nzu64(&mut self) -> Result<Option<NonZero<u64>>, Error> {
224 self.u64().map(NonZero::new)
225 }
226
227 pub fn i64(&mut self) -> Result<i64, Error> {
228 let v = self.u64()?;
229 let shifted = (v >> 1) as i64;
230 Ok(if (v & 1) == 0 {
231 shifted
232 } else if v != 1 {
233 -shifted
234 } else {
235 1 << 63
236 })
237 }
238
239 #[cfg_attr(debug_assertions, track_caller)]
240 pub fn u32(&mut self) -> Result<u32, Error> {
241 let val = self.u64()?;
242 match val.try_into() {
243 Ok(v) => Ok(v),
244 Err(_) => {
245 debug_assert!(false, "{val} overflows u32");
246 Err(Error::ValueOverflow)
247 }
248 }
249 }
250
251 pub fn nzu32(&mut self) -> Result<Option<NonZero<u32>>, Error> {
252 self.u32().map(NonZero::new)
253 }
254
255 #[cfg_attr(debug_assertions, track_caller)]
256 pub fn u8(&mut self) -> Result<u8, Error> {
257 let val = self.u64()?;
258 match val.try_into() {
259 Ok(v) => Ok(v),
260 Err(_) => {
261 debug_assert!(false, "{val} overflows u8");
262 Err(Error::ValueOverflow)
263 }
264 }
265 }
266
267 #[cfg_attr(debug_assertions, track_caller)]
268 pub fn try_from<U: TryFrom<u64>, T: TryFrom<U>>(&mut self) -> Result<T, Error> {
269 let val = self.u64()?;
270 match val.try_into().ok().and_then(|v| T::try_from(v).ok()) {
271 Some(val) => Ok(val),
272 None => {
273 debug_assert!(false, "{val} overflows {}", std::any::type_name::<U>());
274 Err(Error::ValueOverflow)
275 }
276 }
277 }
278
279 pub fn nzu8(&mut self) -> Result<Option<NonZero<u8>>, Error> {
280 self.u8().map(NonZero::new)
281 }
282
283 #[cfg_attr(debug_assertions, track_caller)]
284 pub fn bool(&mut self) -> Result<bool, Error> {
285 match self.u64()? {
286 0 => Ok(false),
287 1 => Ok(true),
288 val => {
289 debug_assert!(false, "{val} overflows bool");
290 Err(Error::ValueOverflow)
291 }
292 }
293 }
294
295 pub fn range(&mut self) -> Result<Range<usize>, Error> {
296 let start = self.u64()? as usize;
297 Ok(Range {
298 start,
299 end: start + self.u64()? as usize,
300 })
301 }
302
303 pub fn blob(&mut self) -> Result<&'input [u8], Error> {
304 match &mut self.ops {
305 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
306 Some(PayloadOperand::Blob) => {
307 let length = self.cursor.read_vbr(6)? as usize;
308 self.cursor.align32()?;
309 let data = self.cursor.read_bytes(length)?;
310 self.cursor.align32()?;
311 Ok(data)
312 }
313 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
314 },
315 Ops::Full(_) => Err(Error::UnexpectedOperand(None)),
316 }
317 }
318
319 pub fn array(&mut self) -> Result<Vec<u64>, Error> {
320 match &mut self.ops {
321 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
322 Some(PayloadOperand::Array(op)) => {
323 let len = self.cursor.read_vbr(6)? as usize;
324 let mut out = Vec::with_capacity(len);
325 for _ in 0..len {
326 if out.len() == out.capacity() {
327 debug_assert!(false);
328 break;
329 }
330 out.push(Self::read_scalar_operand(self.cursor, op)?);
331 }
332 Ok(out)
333 }
334 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
335 },
336 Ops::Full(num_ops) => {
338 let len = *num_ops;
339 *num_ops = 0;
340 let mut out = Vec::with_capacity(len);
341 for _ in 0..len {
342 if out.len() == out.capacity() {
343 debug_assert!(false);
344 break;
345 }
346 out.push(self.cursor.read_vbr(6)?);
347 }
348 Ok(out)
349 }
350 }
351 }
352
353 fn take_payload_operand(
355 state: &mut usize,
356 abbrev: &Abbreviation,
357 ) -> Result<Option<PayloadOperand>, Error> {
358 if *state == abbrev.fields.len() {
359 if abbrev.payload.is_some() {
360 *state += 1;
361 }
362 Ok(abbrev.payload)
363 } else {
364 Err(Error::UnexpectedOperand(
365 abbrev.fields.get(*state).copied().map(Operand::Scalar),
366 ))
367 }
368 }
369
370 pub fn string_utf8(&mut self) -> Result<String, Error> {
375 String::from_utf8(self.string()?).map_err(Error::Encoding)
376 }
377
378 #[cfg_attr(debug_assertions, track_caller)]
383 pub fn string(&mut self) -> Result<Vec<u8>, Error> {
384 match &mut self.ops {
385 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
386 Some(PayloadOperand::Array(el)) => {
387 *state += 1;
388 let len = self.cursor.read_vbr(6)? as usize;
389 let mut out = Vec::with_capacity(len);
390
391 match el {
392 ScalarOperand::Char6 => {
393 for _ in 0..len {
394 if out.len() == out.capacity() {
395 debug_assert!(false);
396 break;
397 }
398 let ch = match self.cursor.read(6)? as u8 {
399 value @ 0..=25 => value + b'a',
400 value @ 26..=51 => value + (b'A' - 26),
401 value @ 52..=61 => value - (52 - b'0'),
402 62 => b'.',
403 63 => b'_',
404 _ => return Err(Error::InvalidAbbrev),
405 };
406 out.push(ch);
407 }
408 }
409 ScalarOperand::Fixed(width @ 6..=8) => {
410 for _ in 0..len {
411 if out.len() == out.capacity() {
412 debug_assert!(false);
413 break;
414 }
415 out.push(self.cursor.read(width)? as u8);
416 }
417 }
418 other => {
419 return Err(Error::UnexpectedOperand(Some(Operand::Scalar(other))));
420 }
421 }
422 Ok(out)
423 }
424 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
425 },
426 Ops::Full(num_ops) => {
427 let len = std::mem::replace(num_ops, 0);
428 let mut out = Vec::with_capacity(len);
429 for _ in 0..len {
430 let ch = self.cursor.read_vbr(6)?;
431 out.push(match u8::try_from(ch) {
432 Ok(c) => c,
433 Err(_) => {
434 debug_assert!(false, "{ch} too big for char");
435 return Err(Error::ValueOverflow);
436 }
437 });
438 }
439 Ok(out)
440 }
441 }
442 }
443
444 pub fn zstring(&mut self) -> Result<String, Error> {
446 let mut s = String::new();
447 while let Some(b) = self.nzu8()? {
448 s.push(b.get() as char);
449 }
450 Ok(s)
451 }
452
453 #[must_use]
458 pub fn debug_abbrev_id(&self) -> Option<u32> {
459 match &self.ops {
460 Ops::Abbrev { abbrev, .. } => Some(abbrev.id),
461 Ops::Full(_) => None,
462 }
463 }
464
465 fn with_cloned_cursor<'new_cursor>(
467 &self,
468 cursor: &'new_cursor mut Cursor<'input>,
469 ) -> RecordIter<'new_cursor, 'input> {
470 RecordIter {
471 id: self.id,
472 ops: self.ops.clone(),
473 cursor,
474 }
475 }
476}
477
478impl Iterator for RecordIter<'_, '_> {
479 type Item = Result<u64, Error>;
480 fn next(&mut self) -> Option<Self::Item> {
481 self.try_next().transpose()
482 }
483}
484
485impl Drop for RecordIter<'_, '_> {
486 fn drop(&mut self) {
488 while let Ok(Some(_)) = self.try_next() {}
489 if let Ops::Abbrev { abbrev, .. } = &self.ops
490 && abbrev.payload.is_some()
491 {
492 let _ = self.payload();
493 }
494 }
495}
496
497struct RecordIterDebugFields<'c, 'i>(RefCell<RecordIter<'c, 'i>>);
498struct RecordIterDebugResult<T, E>(Result<T, E>);
499
500impl fmt::Debug for RecordIter<'_, '_> {
501 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
502 let mut c = self.cursor.clone();
503 let fields = RecordIterDebugFields(RefCell::new(self.with_cloned_cursor(&mut c)));
504
505 f.debug_struct("RecordIter")
506 .field("id", &self.id)
507 .field("fields", &fields)
508 .field("ops", &self.ops)
509 .field("cursor", &self.cursor)
510 .finish()
511 }
512}
513
514impl fmt::Debug for RecordIterDebugFields<'_, '_> {
515 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516 let mut iter = self.0.borrow_mut();
517 let mut d = f.debug_list();
518 d.entries(iter.by_ref().map(RecordIterDebugResult));
519 if let Some(p) = iter.payload().transpose() {
520 d.entries([RecordIterDebugResult(p)]);
521 }
522 d.finish()
523 }
524}
525
526impl<T: fmt::Debug, E: fmt::Debug> fmt::Debug for RecordIterDebugResult<T, E> {
527 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
528 match &self.0 {
529 Ok(t) => t.fmt(f),
530 Err(e) => e.fmt(f),
531 }
532 }
533}
534
535#[derive(Debug, Clone)]
537pub enum BitcodeElement {
538 Block(Block),
540 Record(Record),
542}
543
544impl BitcodeElement {
545 #[must_use]
547 pub fn is_block(&self) -> bool {
548 matches!(self, Self::Block(_))
549 }
550
551 #[must_use]
553 pub fn as_block(&self) -> Option<&Block> {
554 match self {
555 Self::Block(block) => Some(block),
556 Self::Record(_) => None,
557 }
558 }
559
560 pub fn as_block_mut(&mut self) -> Option<&mut Block> {
562 match self {
563 Self::Block(block) => Some(block),
564 Self::Record(_) => None,
565 }
566 }
567
568 #[must_use]
570 pub fn is_record(&self) -> bool {
571 matches!(self, Self::Record(_))
572 }
573
574 #[must_use]
576 pub fn as_record(&self) -> Option<&Record> {
577 match self {
578 Self::Block(_) => None,
579 Self::Record(record) => Some(record),
580 }
581 }
582
583 pub fn as_record_mut(&mut self) -> Option<&mut Record> {
585 match self {
586 Self::Block(_) => None,
587 Self::Record(record) => Some(record),
588 }
589 }
590}
591
592#[derive(Debug, Clone, Default)]
594pub struct BlockInfo {
595 pub name: String,
597 pub record_names: HashMap<u64, String>,
599}
600
601#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
603pub struct Signature {
604 pub magic: u32,
605 pub magic2: u32,
606 pub version: u32,
607 pub offset: u32,
608 pub size: u32,
609 pub cpu_type: u32,
610}
611
612impl Signature {
613 #[must_use]
614 pub fn parse(data: &[u8]) -> Option<(Self, &[u8])> {
615 let (signature, remaining_data) = data.split_first_chunk::<4>()?;
616 let magic = u32::from_le_bytes(*signature);
617 if magic != LLVM_BITCODE_WRAPPER_MAGIC {
618 Some((
619 Self {
620 version: 0,
621 magic,
622 magic2: 0,
623 offset: 4,
624 size: remaining_data.len() as _,
625 cpu_type: 0,
626 },
627 remaining_data,
628 ))
629 } else {
630 if data.len() < 20 {
632 return None;
633 }
634 let mut words = data
635 .chunks_exact(4)
636 .skip(1)
637 .map(|w| u32::from_le_bytes(w.try_into().unwrap()));
638 let version = words.next()?;
639 let offset = words.next()?;
640 let size = words.next()?;
641 let cpu_id = words.next()?;
642 let data = data.get(offset as usize..offset as usize + size as usize)?;
643 let (magic2, remaining_data) = data.split_first_chunk::<4>()?;
644 let magic2 = u32::from_le_bytes(*magic2);
645 Some((
646 Self {
647 version,
648 magic,
649 magic2,
650 offset,
651 size,
652 cpu_type: cpu_id,
653 },
654 remaining_data,
655 ))
656 }
657 }
658}
659
660impl Bitcode {
661 pub fn new(data: &[u8]) -> Result<Self, Error> {
665 let (signature, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
666 let mut reader = BitStreamReader::new();
667 let mut visitor = CollectingVisitor::new();
668 reader.read_block(
669 Cursor::new(stream),
670 BitStreamReader::TOP_LEVEL_BLOCK_ID,
671 2,
672 &mut visitor,
673 )?;
674 Ok(Self {
675 signature,
676 elements: visitor.finalize_top_level_elements(),
677 block_info: reader.block_info,
678 })
679 }
680
681 pub fn read<V>(data: &[u8], visitor: &mut V) -> Result<(), Error>
685 where
686 V: BitStreamVisitor,
687 {
688 let (header, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
689 if !visitor.validate(header) {
690 return Err(Error::InvalidSignature(header.magic));
691 }
692 let mut reader = BitStreamReader::new();
693 reader.read_block(
694 Cursor::new(stream),
695 BitStreamReader::TOP_LEVEL_BLOCK_ID,
696 2,
697 visitor,
698 )
699 }
700}