1use crate::bits::Cursor;
2use crate::bitstream::{Abbreviation, Operand, PayloadOperand, ScalarOperand};
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::fmt;
6use std::num::NonZero;
7use std::ops::Range;
8use std::sync::Arc;
9
10use crate::read::{BitStreamReader, Error};
11use crate::visitor::{BitStreamVisitor, CollectingVisitor};
12
13const LLVM_BITCODE_WRAPPER_MAGIC: u32 = 0x0B17C0DE;
14
15#[derive(Debug, Clone)]
18pub struct Bitcode {
19 pub signature: Signature,
20 pub elements: Vec<BitcodeElement>,
21 pub block_info: HashMap<u32, BlockInfo>,
22}
23
24#[derive(Debug, Clone)]
31pub struct Block {
32 pub id: u32,
34 pub elements: Vec<BitcodeElement>,
36}
37
38#[derive(Debug, Clone)]
39pub enum Payload {
40 Array(Vec<u64>),
41 Char6String(String),
42 Blob(Vec<u8>),
43}
44
45#[derive(Debug, Clone)]
49pub struct Record {
50 pub id: u64,
52 fields: Vec<u64>,
54 payload: Option<Payload>,
56}
57
58impl Record {
59 #[must_use]
60 pub fn fields(&self) -> &[u64] {
61 &self.fields
62 }
63
64 pub fn take_payload(&mut self) -> Option<Payload> {
65 self.payload.take()
66 }
67}
68
69#[derive(Debug, Clone)]
70enum Ops {
71 Abbrev {
72 state: usize,
76 abbrev: Arc<Abbreviation>,
77 },
78 Full(usize),
80}
81
82pub struct RecordIter<'cursor, 'input> {
86 pub id: u64,
88 cursor: &'cursor mut Cursor<'input>,
89 ops: Ops,
90}
91
92impl<'cursor, 'input> RecordIter<'cursor, 'input> {
93 pub(crate) fn into_record(mut self) -> Result<Record, Error> {
94 let mut fields = Vec::with_capacity(self.len());
95 while let Some(f) = self.try_next()? {
96 fields.push(f);
97 }
98 Ok(Record {
99 id: self.id,
100 fields,
101 payload: self.payload().ok().flatten(),
102 })
103 }
104
105 fn read_scalar_operand(cursor: &mut Cursor<'_>, operand: ScalarOperand) -> Result<u64, Error> {
106 match operand {
107 ScalarOperand::Char6 => {
108 let value = cursor.read(6)? as u8;
109 Ok(u64::from(match value {
110 0..=25 => value + b'a',
111 26..=51 => value + (b'A' - 26),
112 52..=61 => value - (52 - b'0'),
113 62 => b'.',
114 63 => b'_',
115 _ => return Err(Error::InvalidAbbrev),
116 }))
117 }
118 ScalarOperand::Literal(value) => Ok(value),
119 ScalarOperand::Fixed(width) => Ok(cursor.read(width)?),
120 ScalarOperand::Vbr(width) => Ok(cursor.read_vbr(width)?),
121 }
122 }
123
124 pub(crate) fn from_cursor_abbrev(
125 cursor: &'cursor mut Cursor<'input>,
126 abbrev: Arc<Abbreviation>,
127 ) -> Result<Self, Error> {
128 let id =
129 Self::read_scalar_operand(cursor, *abbrev.fields.first().ok_or(Error::InvalidAbbrev)?)?;
130 Ok(Self {
131 id,
132 cursor,
133 ops: Ops::Abbrev { state: 1, abbrev },
134 })
135 }
136
137 pub(crate) fn from_cursor(cursor: &'cursor mut Cursor<'input>) -> Result<Self, Error> {
138 let id = cursor.read_vbr_fixed::<6>()?;
139 let num_ops = cursor.read_vbr_fixed::<6>()? as usize;
140 Ok(Self {
141 id,
142 cursor,
143 ops: Ops::Full(num_ops),
144 })
145 }
146
147 pub fn payload(&mut self) -> Result<Option<Payload>, Error> {
148 match &mut self.ops {
149 Ops::Abbrev { state, abbrev } => {
150 if *state > abbrev.fields.len() {
151 return Ok(None);
152 }
153 Ok(match abbrev.payload {
154 Some(PayloadOperand::Blob) => Some(Payload::Blob(self.blob()?.to_vec())),
155 Some(PayloadOperand::Array(ScalarOperand::Char6)) => {
156 Some(Payload::Char6String(
157 String::from_utf8(self.string()?).map_err(|_| Error::InvalidAbbrev)?,
158 ))
159 }
160 Some(PayloadOperand::Array(_)) => Some(Payload::Array(self.array()?)),
161 None => None,
162 })
163 }
164 Ops::Full(_) => Ok(None),
165 }
166 }
167
168 #[must_use]
170 pub fn len(&self) -> usize {
171 match &self.ops {
172 Ops::Abbrev { state, abbrev } => abbrev.fields.len().saturating_sub(*state),
173 Ops::Full(num_ops) => *num_ops,
174 }
175 }
176
177 #[must_use]
179 pub fn is_empty(&self) -> bool {
180 self.len() == 0
181 }
182
183 #[doc(hidden)]
184 #[deprecated(note = "renamed to `try_next()` to avoid confusion with `Iterator::next`")]
185 #[allow(clippy::should_implement_trait)]
186 pub fn next(&mut self) -> Result<Option<u64>, Error> {
187 self.try_next()
188 }
189
190 #[doc(alias = "next")]
192 pub fn try_next(&mut self) -> Result<Option<u64>, Error> {
193 match &mut self.ops {
194 Ops::Abbrev { state, abbrev } => {
195 let Some(&op) = abbrev.fields.get(*state) else {
196 return Ok(None);
197 };
198 *state += 1;
199 Ok(Some(Self::read_scalar_operand(self.cursor, op)?))
200 }
201 Ops::Full(num_ops) => {
202 if *num_ops == 0 {
203 return Ok(None);
204 }
205 *num_ops -= 1;
206 Ok(Some(self.cursor.read_vbr_fixed::<6>()?))
207 }
208 }
209 }
210
211 #[cfg_attr(debug_assertions, track_caller)]
212 pub fn u64(&mut self) -> Result<u64, Error> {
213 match self.try_next()? {
214 Some(v) => Ok(v),
215 None => {
216 debug_assert!(false, "unexpected end of record");
217 Err(Error::EndOfRecord)
218 }
219 }
220 }
221
222 pub fn nzu64(&mut self) -> Result<Option<NonZero<u64>>, Error> {
223 self.u64().map(NonZero::new)
224 }
225
226 pub fn i64(&mut self) -> Result<i64, Error> {
227 let v = self.u64()?;
228 let shifted = (v >> 1) as i64;
229 Ok(if (v & 1) == 0 {
230 shifted
231 } else if v != 1 {
232 -shifted
233 } else {
234 1 << 63
235 })
236 }
237
238 #[cfg_attr(debug_assertions, track_caller)]
239 pub fn u16(&mut self) -> Result<u16, Error> {
240 let val = self.u64()?;
241 match val.try_into() {
242 Ok(v) => Ok(v),
243 Err(_) => {
244 debug_assert!(false, "{val} overflows u16");
245 Err(Error::ValueOverflow)
246 }
247 }
248 }
249
250 #[cfg_attr(debug_assertions, track_caller)]
251 pub fn u32(&mut self) -> Result<u32, Error> {
252 let val = self.u64()?;
253 match val.try_into() {
254 Ok(v) => Ok(v),
255 Err(_) => {
256 debug_assert!(false, "{val} overflows u32");
257 Err(Error::ValueOverflow)
258 }
259 }
260 }
261
262 pub fn nzu32(&mut self) -> Result<Option<NonZero<u32>>, Error> {
263 self.u32().map(NonZero::new)
264 }
265
266 #[cfg_attr(debug_assertions, track_caller)]
267 pub fn u8(&mut self) -> Result<u8, Error> {
268 let val = self.u64()?;
269 match val.try_into() {
270 Ok(v) => Ok(v),
271 Err(_) => {
272 debug_assert!(false, "{val} overflows u8");
273 Err(Error::ValueOverflow)
274 }
275 }
276 }
277
278 #[cfg_attr(debug_assertions, track_caller)]
279 #[inline]
280 pub fn try_from<U: TryFrom<u64>, T: TryFrom<U>>(&mut self) -> Result<T, Error> {
281 self.try_next_from::<U, T>()?.ok_or(Error::EndOfRecord)
282 }
283
284 #[cfg_attr(debug_assertions, track_caller)]
285 pub fn try_next_from<U: TryFrom<u64>, T: TryFrom<U>>(&mut self) -> Result<Option<T>, Error> {
286 match self.try_next()? {
287 Some(val) => {
288 if let Some(val) = val.try_into().ok().and_then(|v| T::try_from(v).ok()) {
289 Ok(Some(val))
290 } else {
291 debug_assert!(
292 false,
293 "{} can't be made from {val} as {}",
294 std::any::type_name::<T>(),
295 std::any::type_name::<U>()
296 );
297 Err(Error::ValueOverflow)
298 }
299 }
300 None => Ok(None),
301 }
302 }
303
304 pub fn nzu8(&mut self) -> Result<Option<NonZero<u8>>, Error> {
305 self.u8().map(NonZero::new)
306 }
307
308 #[cfg_attr(debug_assertions, track_caller)]
309 pub fn bool(&mut self) -> Result<bool, Error> {
310 match self.u64()? {
311 0 => Ok(false),
312 1 => Ok(true),
313 val => {
314 debug_assert!(false, "{val} overflows bool");
315 Err(Error::ValueOverflow)
316 }
317 }
318 }
319
320 pub fn range(&mut self) -> Result<Range<usize>, Error> {
322 let start = self.u64()? as usize;
323 Ok(Range {
324 start,
325 end: start
326 .checked_add(self.u64()? as usize)
327 .ok_or(Error::ValueOverflow)?,
328 })
329 }
330
331 pub fn blob(&mut self) -> Result<&'input [u8], Error> {
332 match &mut self.ops {
333 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
334 Some(PayloadOperand::Blob) => {
335 let length = self.cursor.read_vbr_fixed::<6>()? as usize;
336 self.cursor.align32()?;
337 let data = self.cursor.read_bytes(length)?;
338 self.cursor.align32()?;
339 Ok(data)
340 }
341 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
342 },
343 Ops::Full(_) => Err(Error::UnexpectedOperand(None)),
344 }
345 }
346
347 pub fn array(&mut self) -> Result<Vec<u64>, Error> {
348 match &mut self.ops {
349 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
350 Some(PayloadOperand::Array(op)) => {
351 let len = self.cursor.read_vbr_fixed::<6>()? as usize;
352 let mut out = Vec::with_capacity(len);
353 for _ in 0..len {
354 if out.len() == out.capacity() {
355 debug_assert!(false);
356 break;
357 }
358 out.push(Self::read_scalar_operand(self.cursor, op)?);
359 }
360 Ok(out)
361 }
362 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
363 },
364 Ops::Full(num_ops) => {
366 let len = *num_ops;
367 *num_ops = 0;
368 let mut out = Vec::with_capacity(len);
369 for _ in 0..len {
370 if out.len() == out.capacity() {
371 debug_assert!(false);
372 break;
373 }
374 out.push(self.cursor.read_vbr_fixed::<6>()?);
375 }
376 Ok(out)
377 }
378 }
379 }
380
381 fn take_payload_operand(
383 state: &mut usize,
384 abbrev: &Abbreviation,
385 ) -> Result<Option<PayloadOperand>, Error> {
386 if *state == abbrev.fields.len() {
387 if abbrev.payload.is_some() {
388 *state += 1;
389 }
390 Ok(abbrev.payload)
391 } else {
392 Err(Error::UnexpectedOperand(
393 abbrev.fields.get(*state).copied().map(Operand::Scalar),
394 ))
395 }
396 }
397
398 pub fn string_utf8(&mut self) -> Result<String, Error> {
403 String::from_utf8(self.string()?).map_err(Error::Encoding)
404 }
405
406 #[cfg_attr(debug_assertions, track_caller)]
411 pub fn string(&mut self) -> Result<Vec<u8>, Error> {
412 match &mut self.ops {
413 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
414 Some(PayloadOperand::Array(el)) => {
415 *state += 1;
416 let len = self.cursor.read_vbr_fixed::<6>()? as usize;
417 let mut out = Vec::with_capacity(len);
418
419 match el {
420 ScalarOperand::Char6 => {
421 for _ in 0..len {
422 if out.len() == out.capacity() {
423 debug_assert!(false);
424 break;
425 }
426 let ch = match self.cursor.read(6)? as u8 {
427 value @ 0..=25 => value + b'a',
428 value @ 26..=51 => value + (b'A' - 26),
429 value @ 52..=61 => value - (52 - b'0'),
430 62 => b'.',
431 63 => b'_',
432 _ => return Err(Error::InvalidAbbrev),
433 };
434 out.push(ch);
435 }
436 }
437 ScalarOperand::Fixed(width @ 6..=8) => {
438 for _ in 0..len {
439 if out.len() == out.capacity() {
440 debug_assert!(false);
441 break;
442 }
443 out.push(self.cursor.read(width)? as u8);
444 }
445 }
446 other => {
447 return Err(Error::UnexpectedOperand(Some(Operand::Scalar(other))));
448 }
449 }
450 Ok(out)
451 }
452 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
453 },
454 Ops::Full(num_ops) => {
455 let len = std::mem::replace(num_ops, 0);
456 let mut out = Vec::with_capacity(len);
457 for _ in 0..len {
458 let ch = self.cursor.read_vbr_fixed::<6>()?;
459 out.push(match u8::try_from(ch) {
460 Ok(c) => c,
461 Err(_) => {
462 debug_assert!(false, "{ch} too big for char");
463 return Err(Error::ValueOverflow);
464 }
465 });
466 }
467 Ok(out)
468 }
469 }
470 }
471
472 pub fn zstring(&mut self) -> Result<String, Error> {
474 let mut s = String::new();
475 while let Some(b) = self.nzu8()? {
476 s.push(b.get() as char);
477 }
478 Ok(s)
479 }
480
481 #[must_use]
486 pub fn debug_abbrev_id(&self) -> Option<u32> {
487 match &self.ops {
488 Ops::Abbrev { abbrev, .. } => Some(abbrev.id),
489 Ops::Full(_) => None,
490 }
491 }
492
493 fn with_cloned_cursor<'new_cursor>(
495 &self,
496 cursor: &'new_cursor mut Cursor<'input>,
497 ) -> RecordIter<'new_cursor, 'input> {
498 RecordIter {
499 id: self.id,
500 ops: self.ops.clone(),
501 cursor,
502 }
503 }
504}
505
506impl Iterator for RecordIter<'_, '_> {
507 type Item = Result<u64, Error>;
508 fn next(&mut self) -> Option<Self::Item> {
509 self.try_next().transpose()
510 }
511}
512
513impl Drop for RecordIter<'_, '_> {
514 fn drop(&mut self) {
516 while let Ok(Some(_)) = self.try_next() {}
517 if let Ops::Abbrev { abbrev, .. } = &self.ops
518 && abbrev.payload.is_some()
519 {
520 let _ = self.payload();
521 }
522 }
523}
524
525struct RecordIterDebugFields<'c, 'i>(RefCell<RecordIter<'c, 'i>>);
526struct RecordIterDebugResult<T, E>(Result<T, E>);
527
528impl fmt::Debug for RecordIter<'_, '_> {
529 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
530 let mut c = self.cursor.clone();
531 let fields = RecordIterDebugFields(RefCell::new(self.with_cloned_cursor(&mut c)));
532
533 f.debug_struct("RecordIter")
534 .field("id", &self.id)
535 .field("fields", &fields)
536 .field("ops", &self.ops)
537 .field("cursor", &self.cursor)
538 .finish()
539 }
540}
541
542impl fmt::Debug for RecordIterDebugFields<'_, '_> {
543 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
544 let mut iter = self.0.borrow_mut();
545 let mut d = f.debug_list();
546 d.entries(iter.by_ref().map(RecordIterDebugResult));
547 if let Some(p) = iter.payload().transpose() {
548 d.entries([RecordIterDebugResult(p)]);
549 }
550 d.finish()
551 }
552}
553
554impl<T: fmt::Debug, E: fmt::Debug> fmt::Debug for RecordIterDebugResult<T, E> {
555 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
556 match &self.0 {
557 Ok(t) => t.fmt(f),
558 Err(e) => e.fmt(f),
559 }
560 }
561}
562
563#[derive(Debug, Clone)]
565pub enum BitcodeElement {
566 Block(Block),
568 Record(Record),
570}
571
572impl BitcodeElement {
573 #[must_use]
575 pub fn is_block(&self) -> bool {
576 matches!(self, Self::Block(_))
577 }
578
579 #[must_use]
581 pub fn as_block(&self) -> Option<&Block> {
582 match self {
583 Self::Block(block) => Some(block),
584 Self::Record(_) => None,
585 }
586 }
587
588 pub fn as_block_mut(&mut self) -> Option<&mut Block> {
590 match self {
591 Self::Block(block) => Some(block),
592 Self::Record(_) => None,
593 }
594 }
595
596 #[must_use]
598 pub fn is_record(&self) -> bool {
599 matches!(self, Self::Record(_))
600 }
601
602 #[must_use]
604 pub fn as_record(&self) -> Option<&Record> {
605 match self {
606 Self::Block(_) => None,
607 Self::Record(record) => Some(record),
608 }
609 }
610
611 pub fn as_record_mut(&mut self) -> Option<&mut Record> {
613 match self {
614 Self::Block(_) => None,
615 Self::Record(record) => Some(record),
616 }
617 }
618}
619
620#[derive(Debug, Clone, Default)]
622pub struct BlockInfo {
623 pub name: String,
625 pub record_names: HashMap<u64, String>,
627}
628
629#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
631pub struct Signature {
632 pub magic: u32,
633 pub magic2: u32,
634 pub version: u32,
635 pub offset: u32,
636 pub size: u32,
637 pub cpu_type: u32,
638}
639
640impl Signature {
641 #[must_use]
642 pub fn parse(data: &[u8]) -> Option<(Self, &[u8])> {
643 let (signature, remaining_data) = data.split_first_chunk::<4>()?;
644 let magic = u32::from_le_bytes(*signature);
645 if magic != LLVM_BITCODE_WRAPPER_MAGIC {
646 Some((
647 Self {
648 version: 0,
649 magic,
650 magic2: 0,
651 offset: 4,
652 size: remaining_data.len() as _,
653 cpu_type: 0,
654 },
655 remaining_data,
656 ))
657 } else {
658 if data.len() < 20 {
660 return None;
661 }
662 let mut words = data
663 .chunks_exact(4)
664 .skip(1)
665 .map(|w| u32::from_le_bytes(w.try_into().unwrap()));
666 let version = words.next()?;
667 let offset = words.next()?;
668 let size = words.next()?;
669 let cpu_id = words.next()?;
670 let data = data.get(offset as usize..offset as usize + size as usize)?;
671 let (magic2, remaining_data) = data.split_first_chunk::<4>()?;
672 let magic2 = u32::from_le_bytes(*magic2);
673 Some((
674 Self {
675 version,
676 magic,
677 magic2,
678 offset,
679 size,
680 cpu_type: cpu_id,
681 },
682 remaining_data,
683 ))
684 }
685 }
686}
687
688impl Bitcode {
689 pub fn new(data: &[u8]) -> Result<Self, Error> {
693 let (signature, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
694 let mut reader = BitStreamReader::new();
695 let mut visitor = CollectingVisitor::new();
696 reader.read_block(
697 Cursor::new(stream),
698 BitStreamReader::TOP_LEVEL_BLOCK_ID,
699 2,
700 &mut visitor,
701 )?;
702 Ok(Self {
703 signature,
704 elements: visitor.finalize_top_level_elements(),
705 block_info: reader.block_info,
706 })
707 }
708
709 pub fn read<V>(data: &[u8], visitor: &mut V) -> Result<(), Error>
713 where
714 V: BitStreamVisitor,
715 {
716 let (header, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
717 if !visitor.validate(header) {
718 return Err(Error::InvalidSignature(header.magic));
719 }
720 let mut reader = BitStreamReader::new();
721 reader.read_block(
722 Cursor::new(stream),
723 BitStreamReader::TOP_LEVEL_BLOCK_ID,
724 2,
725 visitor,
726 )
727 }
728}