1use crate::bits::Cursor;
2use crate::bitstream::{Abbreviation, Operand};
3use crate::bitstream::{PayloadOperand, ScalarOperand};
4use std::cell::RefCell;
5use std::collections::HashMap;
6use std::fmt;
7use std::num::NonZero;
8use std::ops::Range;
9use std::sync::Arc;
10
11use crate::read::{BitStreamReader, Error};
12use crate::visitor::{BitStreamVisitor, CollectingVisitor};
13
14const LLVM_BITCODE_WRAPPER_MAGIC: u32 = 0x0B17C0DE;
15
16#[derive(Debug, Clone)]
19pub struct Bitcode {
20 pub signature: Signature,
21 pub elements: Vec<BitcodeElement>,
22 pub block_info: HashMap<u32, BlockInfo>,
23}
24
25#[derive(Debug, Clone)]
32pub struct Block {
33 pub id: u32,
35 pub elements: Vec<BitcodeElement>,
37}
38
39#[derive(Debug, Clone)]
40pub enum Payload {
41 Array(Vec<u64>),
42 Char6String(String),
43 Blob(Vec<u8>),
44}
45
46#[derive(Debug, Clone)]
50pub struct Record {
51 pub id: u64,
53 fields: Vec<u64>,
55 payload: Option<Payload>,
57}
58
59impl Record {
60 #[must_use]
61 pub fn fields(&self) -> &[u64] {
62 &self.fields
63 }
64
65 pub fn take_payload(&mut self) -> Option<Payload> {
66 self.payload.take()
67 }
68}
69
70#[derive(Debug, Clone)]
71enum Ops {
72 Abbrev {
73 state: usize,
77 abbrev: Arc<Abbreviation>,
78 },
79 Full(usize),
81}
82
83pub struct RecordIter<'cursor, 'input> {
87 pub id: u64,
89 cursor: &'cursor mut Cursor<'input>,
90 ops: Ops,
91}
92
93impl<'cursor, 'input> RecordIter<'cursor, 'input> {
94 pub(crate) fn into_record(mut self) -> Result<Record, Error> {
95 let mut fields = Vec::with_capacity(self.len());
96 while let Some(f) = self.next()? {
97 fields.push(f);
98 }
99 Ok(Record {
100 id: self.id,
101 fields,
102 payload: self.payload().ok().flatten(),
103 })
104 }
105
106 fn read_scalar_operand(cursor: &mut Cursor<'_>, operand: ScalarOperand) -> Result<u64, Error> {
107 match operand {
108 ScalarOperand::Char6 => {
109 let value = cursor.read(6)? as u8;
110 Ok(u64::from(match value {
111 0..=25 => value + b'a',
112 26..=51 => value + (b'A' - 26),
113 52..=61 => value - (52 - b'0'),
114 62 => b'.',
115 63 => b'_',
116 _ => return Err(Error::InvalidAbbrev),
117 }))
118 }
119 ScalarOperand::Literal(value) => Ok(value),
120 ScalarOperand::Fixed(width) => Ok(cursor.read(width)?),
121 ScalarOperand::Vbr(width) => Ok(cursor.read_vbr(width)?),
122 }
123 }
124
125 pub(crate) fn from_cursor_abbrev(
126 cursor: &'cursor mut Cursor<'input>,
127 abbrev: Arc<Abbreviation>,
128 ) -> Result<Self, Error> {
129 let id =
130 Self::read_scalar_operand(cursor, *abbrev.fields.first().ok_or(Error::InvalidAbbrev)?)?;
131 Ok(Self {
132 id,
133 cursor,
134 ops: Ops::Abbrev { state: 1, abbrev },
135 })
136 }
137
138 pub(crate) fn from_cursor(cursor: &'cursor mut Cursor<'input>) -> Result<Self, Error> {
139 let id = cursor.read_vbr(6)?;
140 let num_ops = cursor.read_vbr(6)? as usize;
141 Ok(Self {
142 id,
143 cursor,
144 ops: Ops::Full(num_ops),
145 })
146 }
147
148 pub fn payload(&mut self) -> Result<Option<Payload>, Error> {
149 match &mut self.ops {
150 Ops::Abbrev { state, abbrev } => {
151 if *state > abbrev.fields.len() {
152 return Ok(None);
153 }
154 Ok(match abbrev.payload {
155 Some(PayloadOperand::Blob) => Some(Payload::Blob(self.blob()?.to_vec())),
156 Some(PayloadOperand::Array(ScalarOperand::Char6)) => {
157 Some(Payload::Char6String(
158 String::from_utf8(self.string()?).map_err(|_| Error::InvalidAbbrev)?,
159 ))
160 }
161 Some(PayloadOperand::Array(_)) => Some(Payload::Array(self.array()?)),
162 None => None,
163 })
164 }
165 Ops::Full(_) => Ok(None),
166 }
167 }
168
169 #[must_use]
171 pub fn len(&self) -> usize {
172 match &self.ops {
173 Ops::Abbrev { state, abbrev } => abbrev.fields.len().saturating_sub(*state),
174 Ops::Full(num_ops) => *num_ops,
175 }
176 }
177
178 #[must_use]
180 pub fn is_empty(&self) -> bool {
181 self.len() == 0
182 }
183
184 pub fn next(&mut self) -> Result<Option<u64>, Error> {
185 match &mut self.ops {
186 Ops::Abbrev { state, abbrev } => {
187 let Some(&op) = abbrev.fields.get(*state) else {
188 return Ok(None);
189 };
190 *state += 1;
191 Ok(Some(Self::read_scalar_operand(self.cursor, op)?))
192 }
193 Ops::Full(num_ops) => {
194 if *num_ops == 0 {
195 return Ok(None);
196 }
197 *num_ops -= 1;
198 Ok(Some(self.cursor.read_vbr(6)?))
199 }
200 }
201 }
202
203 pub fn u64(&mut self) -> Result<u64, Error> {
204 self.next()?.ok_or(Error::EndOfRecord)
205 }
206
207 pub fn nzu64(&mut self) -> Result<Option<NonZero<u64>>, Error> {
208 self.u64().map(NonZero::new)
209 }
210
211 pub fn i64(&mut self) -> Result<i64, Error> {
212 let v = self.u64()?;
213 let shifted = (v >> 1) as i64;
214 Ok(if (v & 1) == 0 {
215 shifted
216 } else if v != 1 {
217 -shifted
218 } else {
219 1 << 63
220 })
221 }
222
223 pub fn u32(&mut self) -> Result<u32, Error> {
224 self.u64()?.try_into().map_err(|_| Error::ValueOverflow)
225 }
226
227 pub fn nzu32(&mut self) -> Result<Option<NonZero<u32>>, Error> {
228 self.u32().map(NonZero::new)
229 }
230
231 pub fn u8(&mut self) -> Result<u8, Error> {
232 self.u64()?.try_into().map_err(|_| Error::ValueOverflow)
233 }
234
235 pub fn try_from<U: TryFrom<u64>, T: TryFrom<U>>(&mut self) -> Result<T, Error> {
236 T::try_from(self.u64()?.try_into().map_err(|_| Error::ValueOverflow)?)
237 .map_err(|_| Error::ValueOverflow)
238 }
239
240 pub fn nzu8(&mut self) -> Result<Option<NonZero<u8>>, Error> {
241 self.u8().map(NonZero::new)
242 }
243
244 pub fn bool(&mut self) -> Result<bool, Error> {
245 match self.u64()? {
246 0 => Ok(false),
247 1 => Ok(true),
248 _ => Err(Error::ValueOverflow),
249 }
250 }
251
252 pub fn range(&mut self) -> Result<Range<usize>, Error> {
253 let start = self.u64()? as usize;
254 Ok(Range {
255 start,
256 end: start + self.u64()? as usize,
257 })
258 }
259
260 pub fn blob(&mut self) -> Result<&'input [u8], Error> {
261 match &mut self.ops {
262 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
263 Some(PayloadOperand::Blob) => {
264 let length = self.cursor.read_vbr(6)? as usize;
265 self.cursor.align32()?;
266 let data = self.cursor.read_bytes(length)?;
267 self.cursor.align32()?;
268 Ok(data)
269 }
270 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
271 },
272 Ops::Full(_) => Err(Error::UnexpectedOperand(None)),
273 }
274 }
275
276 pub fn array(&mut self) -> Result<Vec<u64>, Error> {
277 match &mut self.ops {
278 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
279 Some(PayloadOperand::Array(op)) => {
280 let len = self.cursor.read_vbr(6)? as usize;
281 let mut out = Vec::with_capacity(len);
282 for _ in 0..len {
283 if out.len() == out.capacity() {
284 debug_assert!(false);
285 break;
286 }
287 out.push(Self::read_scalar_operand(self.cursor, op)?);
288 }
289 Ok(out)
290 }
291 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
292 },
293 Ops::Full(num_ops) => {
295 let len = *num_ops;
296 *num_ops = 0;
297 let mut out = Vec::with_capacity(len);
298 for _ in 0..len {
299 if out.len() == out.capacity() {
300 debug_assert!(false);
301 break;
302 }
303 out.push(self.cursor.read_vbr(6)?);
304 }
305 Ok(out)
306 }
307 }
308 }
309
310 fn take_payload_operand(
312 state: &mut usize,
313 abbrev: &Abbreviation,
314 ) -> Result<Option<PayloadOperand>, Error> {
315 if *state == abbrev.fields.len() {
316 if abbrev.payload.is_some() {
317 *state += 1;
318 }
319 Ok(abbrev.payload)
320 } else {
321 Err(Error::UnexpectedOperand(
322 abbrev.fields.get(*state).copied().map(Operand::Scalar),
323 ))
324 }
325 }
326
327 pub fn string_utf8(&mut self) -> Result<String, Error> {
332 String::from_utf8(self.string()?).map_err(Error::Encoding)
333 }
334
335 pub fn string(&mut self) -> Result<Vec<u8>, Error> {
340 match &mut self.ops {
341 Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
342 Some(PayloadOperand::Array(el)) => {
343 *state += 1;
344 let len = self.cursor.read_vbr(6)? as usize;
345 let mut out = Vec::with_capacity(len);
346
347 match el {
348 ScalarOperand::Char6 => {
349 for _ in 0..len {
350 if out.len() == out.capacity() {
351 debug_assert!(false);
352 break;
353 }
354 let ch = match self.cursor.read(6)? as u8 {
355 value @ 0..=25 => value + b'a',
356 value @ 26..=51 => value + (b'A' - 26),
357 value @ 52..=61 => value - (52 - b'0'),
358 62 => b'.',
359 63 => b'_',
360 _ => return Err(Error::InvalidAbbrev),
361 };
362 out.push(ch);
363 }
364 }
365 ScalarOperand::Fixed(width @ 6..=8) => {
366 for _ in 0..len {
367 if out.len() == out.capacity() {
368 debug_assert!(false);
369 break;
370 }
371 out.push(self.cursor.read(width)? as u8);
372 }
373 }
374 other => {
375 return Err(Error::UnexpectedOperand(Some(Operand::Scalar(other))));
376 }
377 }
378 Ok(out)
379 }
380 other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
381 },
382 Ops::Full(num_ops) => {
383 let len = std::mem::replace(num_ops, 0);
384 let mut out = Vec::with_capacity(len);
385 for _ in 0..len {
386 let ch = self.cursor.read_vbr(6)?;
387 out.push(u8::try_from(ch).map_err(|_| Error::ValueOverflow)?);
388 }
389 Ok(out)
390 }
391 }
392 }
393
394 pub fn zstring(&mut self) -> Result<String, Error> {
396 let mut s = String::new();
397 while let Some(b) = self.nzu8()? {
398 s.push(b.get() as char);
399 }
400 Ok(s)
401 }
402
403 #[must_use]
408 pub fn debug_abbrev_id(&self) -> Option<u32> {
409 match &self.ops {
410 Ops::Abbrev { abbrev, .. } => Some(abbrev.id),
411 Ops::Full(_) => None,
412 }
413 }
414
415 fn from_cloned_cursor<'new_cursor>(
417 &self,
418 cursor: &'new_cursor mut Cursor<'input>,
419 ) -> RecordIter<'new_cursor, 'input> {
420 RecordIter {
421 id: self.id,
422 ops: self.ops.clone(),
423 cursor,
424 }
425 }
426}
427
428impl Iterator for RecordIter<'_, '_> {
429 type Item = Result<u64, Error>;
430 fn next(&mut self) -> Option<Self::Item> {
431 self.next().transpose()
432 }
433}
434
435impl Drop for RecordIter<'_, '_> {
436 fn drop(&mut self) {
438 while let Ok(Some(_)) = self.next() {}
439 if let Ops::Abbrev { abbrev, .. } = &self.ops
440 && abbrev.payload.is_some()
441 {
442 let _ = self.payload();
443 }
444 }
445}
446
447struct RecordIterDebugFields<'c, 'i>(RefCell<RecordIter<'c, 'i>>);
448struct RecordIterDebugResult<T, E>(Result<T, E>);
449
450impl fmt::Debug for RecordIter<'_, '_> {
451 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
452 let mut c = self.cursor.clone();
453 let fields = RecordIterDebugFields(RefCell::new(self.from_cloned_cursor(&mut c)));
454
455 f.debug_struct("RecordIter")
456 .field("id", &self.id)
457 .field("fields", &fields)
458 .field("ops", &self.ops)
459 .field("cursor", &self.cursor)
460 .finish()
461 }
462}
463
464impl fmt::Debug for RecordIterDebugFields<'_, '_> {
465 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
466 let mut iter = self.0.borrow_mut();
467 let mut d = f.debug_list();
468 d.entries(iter.by_ref().map(RecordIterDebugResult));
469 if let Some(p) = iter.payload().transpose() {
470 d.entries([RecordIterDebugResult(p)]);
471 }
472 d.finish()
473 }
474}
475
476impl<T: fmt::Debug, E: fmt::Debug> fmt::Debug for RecordIterDebugResult<T, E> {
477 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
478 match &self.0 {
479 Ok(t) => t.fmt(f),
480 Err(e) => e.fmt(f),
481 }
482 }
483}
484
485#[derive(Debug, Clone)]
487pub enum BitcodeElement {
488 Block(Block),
490 Record(Record),
492}
493
494impl BitcodeElement {
495 #[must_use]
497 pub fn is_block(&self) -> bool {
498 matches!(self, Self::Block(_))
499 }
500
501 #[must_use]
503 pub fn as_block(&self) -> Option<&Block> {
504 match self {
505 Self::Block(block) => Some(block),
506 Self::Record(_) => None,
507 }
508 }
509
510 pub fn as_block_mut(&mut self) -> Option<&mut Block> {
512 match self {
513 Self::Block(block) => Some(block),
514 Self::Record(_) => None,
515 }
516 }
517
518 #[must_use]
520 pub fn is_record(&self) -> bool {
521 matches!(self, Self::Record(_))
522 }
523
524 #[must_use]
526 pub fn as_record(&self) -> Option<&Record> {
527 match self {
528 Self::Block(_) => None,
529 Self::Record(record) => Some(record),
530 }
531 }
532
533 pub fn as_record_mut(&mut self) -> Option<&mut Record> {
535 match self {
536 Self::Block(_) => None,
537 Self::Record(record) => Some(record),
538 }
539 }
540}
541
542#[derive(Debug, Clone, Default)]
544pub struct BlockInfo {
545 pub name: String,
547 pub record_names: HashMap<u64, String>,
549}
550
551#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
553pub struct Signature {
554 pub magic: u32,
555 pub magic2: u32,
556 pub version: u32,
557 pub offset: u32,
558 pub size: u32,
559 pub cpu_type: u32,
560}
561
562impl Signature {
563 #[must_use]
564 pub fn parse(data: &[u8]) -> Option<(Self, &[u8])> {
565 let (signature, remaining_data) = data.split_first_chunk::<4>()?;
566 let magic = u32::from_le_bytes(*signature);
567 if magic != LLVM_BITCODE_WRAPPER_MAGIC {
568 Some((
569 Self {
570 version: 0,
571 magic,
572 magic2: 0,
573 offset: 4,
574 size: remaining_data.len() as _,
575 cpu_type: 0,
576 },
577 remaining_data,
578 ))
579 } else {
580 if data.len() < 20 {
582 return None;
583 }
584 let mut words = data
585 .chunks_exact(4)
586 .skip(1)
587 .map(|w| u32::from_le_bytes(w.try_into().unwrap()));
588 let version = words.next()?;
589 let offset = words.next()?;
590 let size = words.next()?;
591 let cpu_id = words.next()?;
592 let data = data.get(offset as usize..offset as usize + size as usize)?;
593 let (magic2, remaining_data) = data.split_first_chunk::<4>()?;
594 let magic2 = u32::from_le_bytes(*magic2);
595 Some((
596 Self {
597 version,
598 magic,
599 magic2,
600 offset,
601 size,
602 cpu_type: cpu_id,
603 },
604 remaining_data,
605 ))
606 }
607 }
608}
609
610impl Bitcode {
611 pub fn new(data: &[u8]) -> Result<Self, Error> {
615 let (signature, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
616 let mut reader = BitStreamReader::new();
617 let mut visitor = CollectingVisitor::new();
618 reader.read_block(
619 Cursor::new(stream),
620 BitStreamReader::TOP_LEVEL_BLOCK_ID,
621 2,
622 &mut visitor,
623 )?;
624 Ok(Self {
625 signature,
626 elements: visitor.finalize_top_level_elements(),
627 block_info: reader.block_info,
628 })
629 }
630
631 pub fn read<V>(data: &[u8], visitor: &mut V) -> Result<(), Error>
635 where
636 V: BitStreamVisitor,
637 {
638 let (header, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
639 if !visitor.validate(header) {
640 return Err(Error::InvalidSignature(header.magic));
641 }
642 let mut reader = BitStreamReader::new();
643 reader.read_block(
644 Cursor::new(stream),
645 BitStreamReader::TOP_LEVEL_BLOCK_ID,
646 2,
647 visitor,
648 )
649 }
650}