use crate::bits::Cursor;
use crate::bitstream::{Abbreviation, Operand, PayloadOperand, ScalarOperand};
use std::cell::RefCell;
use std::collections::HashMap;
use std::fmt;
use std::num::NonZero;
use std::ops::Range;
use std::sync::Arc;
use crate::read::{BitStreamReader, Error};
use crate::visitor::{BitStreamVisitor, CollectingVisitor};
const LLVM_BITCODE_WRAPPER_MAGIC: u32 = 0x0B17C0DE;
#[derive(Debug, Clone)]
pub struct Bitcode {
pub signature: Signature,
pub elements: Vec<BitcodeElement>,
pub block_info: HashMap<u32, BlockInfo>,
}
#[derive(Debug, Clone)]
pub struct Block {
pub id: u32,
pub elements: Vec<BitcodeElement>,
}
#[derive(Debug, Clone)]
pub enum Payload {
Array(Vec<u64>),
Char6String(String),
Blob(Vec<u8>),
}
#[derive(Debug, Clone)]
pub struct Record {
pub id: u64,
fields: Vec<u64>,
payload: Option<Payload>,
}
impl Record {
#[must_use]
pub fn fields(&self) -> &[u64] {
&self.fields
}
pub fn take_payload(&mut self) -> Option<Payload> {
self.payload.take()
}
}
#[derive(Debug, Clone)]
enum Ops {
Abbrev {
state: usize,
abbrev: Arc<Abbreviation>,
},
Full(usize),
}
pub struct RecordIter<'cursor, 'input> {
pub id: u64,
cursor: &'cursor mut Cursor<'input>,
ops: Ops,
}
impl<'cursor, 'input> RecordIter<'cursor, 'input> {
pub(crate) fn into_record(mut self) -> Result<Record, Error> {
let mut fields = Vec::with_capacity(self.len());
while let Some(f) = self.try_next()? {
fields.push(f);
}
Ok(Record {
id: self.id,
fields,
payload: self.payload().ok().flatten(),
})
}
fn read_scalar_operand(cursor: &mut Cursor<'_>, operand: ScalarOperand) -> Result<u64, Error> {
match operand {
ScalarOperand::Char6 => {
let value = cursor.read(6)? as u8;
Ok(u64::from(match value {
0..=25 => value + b'a',
26..=51 => value + (b'A' - 26),
52..=61 => value - (52 - b'0'),
62 => b'.',
63 => b'_',
_ => return Err(Error::InvalidAbbrev),
}))
}
ScalarOperand::Literal(value) => Ok(value),
ScalarOperand::Fixed(width) => Ok(cursor.read(width)?),
ScalarOperand::Vbr(width) => Ok(cursor.read_vbr(width)?),
}
}
pub(crate) fn from_cursor_abbrev(
cursor: &'cursor mut Cursor<'input>,
abbrev: Arc<Abbreviation>,
) -> Result<Self, Error> {
let id =
Self::read_scalar_operand(cursor, *abbrev.fields.first().ok_or(Error::InvalidAbbrev)?)?;
Ok(Self {
id,
cursor,
ops: Ops::Abbrev { state: 1, abbrev },
})
}
pub(crate) fn from_cursor(cursor: &'cursor mut Cursor<'input>) -> Result<Self, Error> {
let id = cursor.read_vbr_fixed::<6>()?;
let num_ops = cursor.read_vbr_fixed::<6>()? as usize;
Ok(Self {
id,
cursor,
ops: Ops::Full(num_ops),
})
}
pub fn payload(&mut self) -> Result<Option<Payload>, Error> {
match &mut self.ops {
Ops::Abbrev { state, abbrev } => {
if *state > abbrev.fields.len() {
return Ok(None);
}
Ok(match abbrev.payload {
Some(PayloadOperand::Blob) => Some(Payload::Blob(self.blob()?.to_vec())),
Some(PayloadOperand::Array(ScalarOperand::Char6)) => {
Some(Payload::Char6String(
String::from_utf8(self.string()?).map_err(|_| Error::InvalidAbbrev)?,
))
}
Some(PayloadOperand::Array(_)) => Some(Payload::Array(self.array()?)),
None => None,
})
}
Ops::Full(_) => Ok(None),
}
}
#[must_use]
pub fn len(&self) -> usize {
match &self.ops {
Ops::Abbrev { state, abbrev } => abbrev.fields.len().saturating_sub(*state),
Ops::Full(num_ops) => *num_ops,
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[doc(hidden)]
#[deprecated(note = "renamed to `try_next()` to avoid confusion with `Iterator::next`")]
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Option<u64>, Error> {
self.try_next()
}
#[doc(alias = "next")]
pub fn try_next(&mut self) -> Result<Option<u64>, Error> {
match &mut self.ops {
Ops::Abbrev { state, abbrev } => {
let Some(&op) = abbrev.fields.get(*state) else {
return Ok(None);
};
*state += 1;
Ok(Some(Self::read_scalar_operand(self.cursor, op)?))
}
Ops::Full(num_ops) => {
if *num_ops == 0 {
return Ok(None);
}
*num_ops -= 1;
Ok(Some(self.cursor.read_vbr_fixed::<6>()?))
}
}
}
#[cfg_attr(debug_assertions, track_caller)]
pub fn u64(&mut self) -> Result<u64, Error> {
match self.try_next()? {
Some(v) => Ok(v),
None => {
debug_assert!(false, "unexpected end of record");
Err(Error::EndOfRecord)
}
}
}
pub fn nzu64(&mut self) -> Result<Option<NonZero<u64>>, Error> {
self.u64().map(NonZero::new)
}
pub fn i64(&mut self) -> Result<i64, Error> {
let v = self.u64()?;
let shifted = (v >> 1) as i64;
Ok(if (v & 1) == 0 {
shifted
} else if v != 1 {
-shifted
} else {
1 << 63
})
}
#[cfg_attr(debug_assertions, track_caller)]
pub fn u16(&mut self) -> Result<u16, Error> {
let val = self.u64()?;
match val.try_into() {
Ok(v) => Ok(v),
Err(_) => {
debug_assert!(false, "{val} overflows u16");
Err(Error::ValueOverflow)
}
}
}
#[cfg_attr(debug_assertions, track_caller)]
pub fn u32(&mut self) -> Result<u32, Error> {
let val = self.u64()?;
match val.try_into() {
Ok(v) => Ok(v),
Err(_) => {
debug_assert!(false, "{val} overflows u32");
Err(Error::ValueOverflow)
}
}
}
pub fn nzu32(&mut self) -> Result<Option<NonZero<u32>>, Error> {
self.u32().map(NonZero::new)
}
#[cfg_attr(debug_assertions, track_caller)]
pub fn u8(&mut self) -> Result<u8, Error> {
let val = self.u64()?;
match val.try_into() {
Ok(v) => Ok(v),
Err(_) => {
debug_assert!(false, "{val} overflows u8");
Err(Error::ValueOverflow)
}
}
}
#[cfg_attr(debug_assertions, track_caller)]
#[inline]
pub fn try_from<U: TryFrom<u64>, T: TryFrom<U>>(&mut self) -> Result<T, Error> {
self.try_next_from::<U, T>()?.ok_or(Error::EndOfRecord)
}
#[cfg_attr(debug_assertions, track_caller)]
pub fn try_next_from<U: TryFrom<u64>, T: TryFrom<U>>(&mut self) -> Result<Option<T>, Error> {
match self.try_next()? {
Some(val) => {
if let Some(val) = val.try_into().ok().and_then(|v| T::try_from(v).ok()) {
Ok(Some(val))
} else {
debug_assert!(
false,
"{} can't be made from {val} as {}",
std::any::type_name::<T>(),
std::any::type_name::<U>()
);
Err(Error::ValueOverflow)
}
}
None => Ok(None),
}
}
pub fn nzu8(&mut self) -> Result<Option<NonZero<u8>>, Error> {
self.u8().map(NonZero::new)
}
#[cfg_attr(debug_assertions, track_caller)]
pub fn bool(&mut self) -> Result<bool, Error> {
match self.u64()? {
0 => Ok(false),
1 => Ok(true),
val => {
debug_assert!(false, "{val} overflows bool");
Err(Error::ValueOverflow)
}
}
}
pub fn range(&mut self) -> Result<Range<usize>, Error> {
let start = self.u64()? as usize;
Ok(Range {
start,
end: start
.checked_add(self.u64()? as usize)
.ok_or(Error::ValueOverflow)?,
})
}
pub fn blob(&mut self) -> Result<&'input [u8], Error> {
match &mut self.ops {
Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
Some(PayloadOperand::Blob) => {
let length = self.cursor.read_vbr_fixed::<6>()? as usize;
self.cursor.align32()?;
let data = self.cursor.read_bytes(length)?;
self.cursor.align32()?;
Ok(data)
}
other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
},
Ops::Full(_) => Err(Error::UnexpectedOperand(None)),
}
}
pub fn array(&mut self) -> Result<Vec<u64>, Error> {
match &mut self.ops {
Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
Some(PayloadOperand::Array(op)) => {
let len = self.cursor.read_vbr_fixed::<6>()? as usize;
let mut out = Vec::with_capacity(len);
for _ in 0..len {
if out.len() == out.capacity() {
debug_assert!(false);
break;
}
out.push(Self::read_scalar_operand(self.cursor, op)?);
}
Ok(out)
}
other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
},
Ops::Full(num_ops) => {
let len = *num_ops;
*num_ops = 0;
let mut out = Vec::with_capacity(len);
for _ in 0..len {
if out.len() == out.capacity() {
debug_assert!(false);
break;
}
out.push(self.cursor.read_vbr_fixed::<6>()?);
}
Ok(out)
}
}
}
fn take_payload_operand(
state: &mut usize,
abbrev: &Abbreviation,
) -> Result<Option<PayloadOperand>, Error> {
if *state == abbrev.fields.len() {
if abbrev.payload.is_some() {
*state += 1;
}
Ok(abbrev.payload)
} else {
Err(Error::UnexpectedOperand(
abbrev.fields.get(*state).copied().map(Operand::Scalar),
))
}
}
pub fn string_utf8(&mut self) -> Result<String, Error> {
String::from_utf8(self.string()?).map_err(Error::Encoding)
}
#[cfg_attr(debug_assertions, track_caller)]
pub fn string(&mut self) -> Result<Vec<u8>, Error> {
match &mut self.ops {
Ops::Abbrev { state, abbrev } => match Self::take_payload_operand(state, abbrev)? {
Some(PayloadOperand::Array(el)) => {
*state += 1;
let len = self.cursor.read_vbr_fixed::<6>()? as usize;
let mut out = Vec::with_capacity(len);
match el {
ScalarOperand::Char6 => {
for _ in 0..len {
if out.len() == out.capacity() {
debug_assert!(false);
break;
}
let ch = match self.cursor.read(6)? as u8 {
value @ 0..=25 => value + b'a',
value @ 26..=51 => value + (b'A' - 26),
value @ 52..=61 => value - (52 - b'0'),
62 => b'.',
63 => b'_',
_ => return Err(Error::InvalidAbbrev),
};
out.push(ch);
}
}
ScalarOperand::Fixed(width @ 6..=8) => {
for _ in 0..len {
if out.len() == out.capacity() {
debug_assert!(false);
break;
}
out.push(self.cursor.read(width)? as u8);
}
}
other => {
return Err(Error::UnexpectedOperand(Some(Operand::Scalar(other))));
}
}
Ok(out)
}
other => Err(Error::UnexpectedOperand(other.map(Operand::Payload))),
},
Ops::Full(num_ops) => {
let len = std::mem::replace(num_ops, 0);
let mut out = Vec::with_capacity(len);
for _ in 0..len {
let ch = self.cursor.read_vbr_fixed::<6>()?;
out.push(match u8::try_from(ch) {
Ok(c) => c,
Err(_) => {
debug_assert!(false, "{ch} too big for char");
return Err(Error::ValueOverflow);
}
});
}
Ok(out)
}
}
}
pub fn zstring(&mut self) -> Result<String, Error> {
let mut s = String::new();
while let Some(b) = self.nzu8()? {
s.push(b.get() as char);
}
Ok(s)
}
#[must_use]
pub fn debug_abbrev_id(&self) -> Option<u32> {
match &self.ops {
Ops::Abbrev { abbrev, .. } => Some(abbrev.id),
Ops::Full(_) => None,
}
}
fn with_cloned_cursor<'new_cursor>(
&self,
cursor: &'new_cursor mut Cursor<'input>,
) -> RecordIter<'new_cursor, 'input> {
RecordIter {
id: self.id,
ops: self.ops.clone(),
cursor,
}
}
}
impl Iterator for RecordIter<'_, '_> {
type Item = Result<u64, Error>;
fn next(&mut self) -> Option<Self::Item> {
self.try_next().transpose()
}
}
impl Drop for RecordIter<'_, '_> {
fn drop(&mut self) {
while let Ok(Some(_)) = self.try_next() {}
if let Ops::Abbrev { abbrev, .. } = &self.ops
&& abbrev.payload.is_some()
{
let _ = self.payload();
}
}
}
struct RecordIterDebugFields<'c, 'i>(RefCell<RecordIter<'c, 'i>>);
struct RecordIterDebugResult<T, E>(Result<T, E>);
impl fmt::Debug for RecordIter<'_, '_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut c = self.cursor.clone();
let fields = RecordIterDebugFields(RefCell::new(self.with_cloned_cursor(&mut c)));
f.debug_struct("RecordIter")
.field("id", &self.id)
.field("fields", &fields)
.field("ops", &self.ops)
.field("cursor", &self.cursor)
.finish()
}
}
impl fmt::Debug for RecordIterDebugFields<'_, '_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut iter = self.0.borrow_mut();
let mut d = f.debug_list();
d.entries(iter.by_ref().map(RecordIterDebugResult));
if let Some(p) = iter.payload().transpose() {
d.entries([RecordIterDebugResult(p)]);
}
d.finish()
}
}
impl<T: fmt::Debug, E: fmt::Debug> fmt::Debug for RecordIterDebugResult<T, E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.0 {
Ok(t) => t.fmt(f),
Err(e) => e.fmt(f),
}
}
}
#[derive(Debug, Clone)]
pub enum BitcodeElement {
Block(Block),
Record(Record),
}
impl BitcodeElement {
#[must_use]
pub fn is_block(&self) -> bool {
matches!(self, Self::Block(_))
}
#[must_use]
pub fn as_block(&self) -> Option<&Block> {
match self {
Self::Block(block) => Some(block),
Self::Record(_) => None,
}
}
pub fn as_block_mut(&mut self) -> Option<&mut Block> {
match self {
Self::Block(block) => Some(block),
Self::Record(_) => None,
}
}
#[must_use]
pub fn is_record(&self) -> bool {
matches!(self, Self::Record(_))
}
#[must_use]
pub fn as_record(&self) -> Option<&Record> {
match self {
Self::Block(_) => None,
Self::Record(record) => Some(record),
}
}
pub fn as_record_mut(&mut self) -> Option<&mut Record> {
match self {
Self::Block(_) => None,
Self::Record(record) => Some(record),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct BlockInfo {
pub name: String,
pub record_names: HashMap<u64, String>,
}
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
pub struct Signature {
pub magic: u32,
pub magic2: u32,
pub version: u32,
pub offset: u32,
pub size: u32,
pub cpu_type: u32,
}
impl Signature {
#[must_use]
pub fn parse(data: &[u8]) -> Option<(Self, &[u8])> {
let (signature, remaining_data) = data.split_first_chunk::<4>()?;
let magic = u32::from_le_bytes(*signature);
if magic != LLVM_BITCODE_WRAPPER_MAGIC {
Some((
Self {
version: 0,
magic,
magic2: 0,
offset: 4,
size: remaining_data.len() as _,
cpu_type: 0,
},
remaining_data,
))
} else {
if data.len() < 20 {
return None;
}
let mut words = data
.chunks_exact(4)
.skip(1)
.map(|w| u32::from_le_bytes(w.try_into().unwrap()));
let version = words.next()?;
let offset = words.next()?;
let size = words.next()?;
let cpu_id = words.next()?;
let data = data.get(offset as usize..offset as usize + size as usize)?;
let (magic2, remaining_data) = data.split_first_chunk::<4>()?;
let magic2 = u32::from_le_bytes(*magic2);
Some((
Self {
version,
magic,
magic2,
offset,
size,
cpu_type: cpu_id,
},
remaining_data,
))
}
}
}
impl Bitcode {
pub fn new(data: &[u8]) -> Result<Self, Error> {
let (signature, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
let mut reader = BitStreamReader::new();
let mut visitor = CollectingVisitor::new();
reader.read_block(
Cursor::new(stream),
BitStreamReader::TOP_LEVEL_BLOCK_ID,
2,
&mut visitor,
)?;
Ok(Self {
signature,
elements: visitor.finalize_top_level_elements(),
block_info: reader.block_info,
})
}
pub fn read<V>(data: &[u8], visitor: &mut V) -> Result<(), Error>
where
V: BitStreamVisitor,
{
let (header, stream) = Signature::parse(data).ok_or(Error::InvalidSignature(0))?;
if !visitor.validate(header) {
return Err(Error::InvalidSignature(header.magic));
}
let mut reader = BitStreamReader::new();
reader.read_block(
Cursor::new(stream),
BitStreamReader::TOP_LEVEL_BLOCK_ID,
2,
visitor,
)
}
}