use std::{
collections::{HashMap, HashSet},
sync::Arc,
};
use base64::{engine::general_purpose::STANDARD, Engine};
use crc::Crc;
use tlb::{
bits::{
bitvec::{order::Msb0, vec::BitVec, view::AsBits},
de::{args::BitUnpackWithArgs, BitReader, BitReaderExt, BitUnpack},
r#as::{NBits, VarNBytes},
ser::{args::BitPackWithArgs, BitWriter, BitWriterExt},
},
Cell, Error, ResultExt, StringError,
};
pub type BoC = BagOfCells;
#[derive(Debug, Clone)]
pub struct BagOfCells {
roots: Vec<Arc<Cell>>,
}
impl BagOfCells {
#[inline]
pub fn from_root(root: impl Into<Arc<Cell>>) -> Self {
Self {
roots: [root.into()].into(),
}
}
#[inline]
pub fn add_root(&mut self, root: impl Into<Arc<Cell>>) {
self.roots.push(root.into())
}
#[inline]
pub fn single_root(&self) -> Option<&Arc<Cell>> {
let [root]: &[_; 1] = self.roots.as_slice().try_into().ok()?;
Some(root)
}
fn traverse_cell_tree(
cell: &Arc<Cell>,
all_cells: &mut HashSet<Arc<Cell>>,
in_refs: &mut HashMap<Arc<Cell>, HashSet<Arc<Cell>>>,
) -> Result<(), StringError> {
if all_cells.insert(cell.clone()) {
for r in &cell.references {
if r == cell {
return Err(Error::custom("cell must not reference itself"));
}
in_refs.entry(r.clone()).or_default().insert(cell.clone());
Self::traverse_cell_tree(r, all_cells, in_refs)?;
}
}
Ok(())
}
pub fn parse_hex(s: impl AsRef<[u8]>) -> Result<Self, StringError> {
let bytes = hex::decode(s).map_err(Error::custom)?;
Self::unpack(bytes.as_bits())
}
pub fn parse_base64(s: impl AsRef<[u8]>) -> Result<Self, StringError> {
let bytes = STANDARD.decode(s).map_err(Error::custom)?;
Self::unpack(bytes.as_bits())
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct BagOfCellsArgs {
pub has_idx: bool,
pub has_crc32c: bool,
}
impl BitPackWithArgs for BagOfCells {
type Args = BagOfCellsArgs;
fn pack_with<W>(&self, writer: W, args: Self::Args) -> Result<(), W::Error>
where
W: BitWriter,
{
let mut all_cells: HashSet<Arc<Cell>> = HashSet::new();
let mut in_refs: HashMap<Arc<Cell>, HashSet<Arc<Cell>>> = HashMap::new();
for r in &self.roots {
Self::traverse_cell_tree(r, &mut all_cells, &mut in_refs).map_err(Error::custom)?;
}
let mut no_in_refs: HashSet<Arc<Cell>> = HashSet::new();
for c in &all_cells {
if !in_refs.contains_key(c) {
no_in_refs.insert(c.clone());
}
}
let mut ordered_cells: Vec<Arc<Cell>> = Vec::new();
let mut indices: HashMap<Arc<Cell>, u32> = HashMap::new();
while let Some(cell) = no_in_refs.iter().next().cloned() {
ordered_cells.push(cell.clone());
indices.insert(cell.clone(), indices.len() as u32);
for child in &cell.references {
if let Some(refs) = in_refs.get_mut(child) {
refs.remove(&cell);
if refs.is_empty() {
no_in_refs.insert(child.clone());
in_refs.remove(child);
}
}
}
no_in_refs.remove(&cell);
}
if !in_refs.is_empty() {
return Err(Error::custom("reference cycle detected"));
}
RawBagOfCells {
cells: ordered_cells
.into_iter()
.map(|cell| RawCell {
data: cell.data.clone(),
references: cell
.references
.iter()
.map(|c| *indices.get(c).unwrap())
.collect(),
level: cell.level(),
})
.collect(),
roots: self
.roots
.iter()
.map(|c| *indices.get(c).unwrap())
.collect(),
}
.pack_with(writer, args)
}
}
impl BitUnpack for BagOfCells {
fn unpack<R>(reader: R) -> Result<Self, R::Error>
where
R: BitReader,
{
let raw = RawBagOfCells::unpack(reader)?;
let num_cells = raw.cells.len();
let mut cells: Vec<Arc<Cell>> = Vec::new();
for (i, raw_cell) in raw.cells.into_iter().enumerate().rev() {
cells.push(
Cell {
data: raw_cell.data,
references: raw_cell
.references
.into_iter()
.map(|r| {
if r <= i as u32 {
return Err(Error::custom(format!(
"references to previous cells are not supported: [{i}] -> [{r}]"
)));
}
Ok(cells[num_cells - 1 - r as usize].clone())
})
.collect::<Result<_, _>>()?,
}
.into(),
);
}
Ok(BagOfCells {
roots: raw
.roots
.into_iter()
.map(|r| cells[num_cells - 1 - r as usize].clone())
.collect(),
})
}
}
const CRC_32_ISCSI: Crc<u32> = Crc::<u32>::new(&crc::CRC_32_ISCSI);
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
struct RawBagOfCells {
pub cells: Vec<RawCell>,
pub roots: Vec<u32>,
}
impl RawBagOfCells {
const INDEXED_BOC_TAG: u32 = 0x68ff65f3;
const INDEXED_CRC32_TAG: u32 = 0xacc3a728;
const GENERIC_BOC_TAG: u32 = 0xb5ee9c72;
}
impl BitPackWithArgs for RawBagOfCells {
type Args = BagOfCellsArgs;
fn pack_with<W>(&self, mut writer: W, args: Self::Args) -> Result<(), W::Error>
where
W: BitWriter,
{
if self.roots.len() > 1 {
return Err(Error::custom("only single root cell supported"));
}
let size_bits: u32 = 32 - (self.cells.len() as u32).leading_zeros();
let size_bytes: u32 = (size_bits + 7) / 8;
let mut tot_cells_size: u32 = 0;
let mut index = Vec::<u32>::with_capacity(self.cells.len());
for cell in &self.cells {
index.push(tot_cells_size);
tot_cells_size += cell.size(size_bytes);
}
let off_bits: u32 = 32 - tot_cells_size.leading_zeros();
let off_bytes: u32 = (off_bits + 7) / 8;
let mut buffered = writer.as_mut().tee(BitVec::<u8, Msb0>::new());
buffered
.pack(Self::GENERIC_BOC_TAG)?
.pack(args.has_idx)?
.pack(args.has_crc32c)?
.pack(false)?
.pack_as::<u8, NBits<2>>(0)?
.pack_as::<_, NBits<3>>(size_bytes)?
.pack_as::<_, NBits<8>>(off_bytes)?
.pack_as_with::<_, VarNBytes>(self.cells.len() as u32, size_bytes)?
.pack_as_with::<_, VarNBytes>(1u32, size_bytes)? .pack_as_with::<_, VarNBytes>(0u32, size_bytes)? .pack_as_with::<_, VarNBytes>(tot_cells_size, off_bytes)?
.pack_as_with::<_, VarNBytes>(0u32, size_bytes)?; if args.has_idx {
buffered.pack_many_as_with::<_, VarNBytes>(index, off_bytes)?;
}
for (i, cell) in self.cells.iter().enumerate() {
cell.pack_with(&mut buffered, size_bytes)
.with_context(|| format!("[{i}]"))?;
}
let buf = buffered.into_writer();
if buf.len() % 8 != 0 {
return Err(Error::custom("produced stream is not byte-aligned"));
}
if args.has_crc32c {
let cs = CRC_32_ISCSI.checksum(buf.as_raw_slice());
writer.write_bitslice(cs.to_le_bytes().as_bits())?;
}
Ok(())
}
}
impl BitUnpack for RawBagOfCells {
fn unpack<R>(mut reader: R) -> Result<Self, R::Error>
where
R: BitReader,
{
let mut buffered = reader.as_mut().tee(BitVec::<u8, Msb0>::new());
let tag = buffered.unpack::<u32>()?;
let (has_idx, has_crc32c) = match tag {
Self::INDEXED_BOC_TAG => (true, false),
Self::INDEXED_CRC32_TAG => (true, true),
Self::GENERIC_BOC_TAG => {
let (has_idx, has_crc32c) = buffered.unpack()?;
let _has_cache_bits: bool = buffered.unpack()?;
let _flags: u8 = buffered.unpack_as::<_, NBits<2>>()?;
(has_idx, has_crc32c)
}
_ => return Err(Error::custom(format!("invalid BoC tag: {tag:#x}"))),
};
let size_bytes: u32 = buffered.unpack_as::<_, NBits<3>>()?;
if size_bytes > 4 {
return Err(Error::custom(format!("invalid size: {size_bytes}")));
}
let off_bytes: u32 = buffered.unpack_as::<_, NBits<8>>()?;
if size_bytes > 8 {
return Err(Error::custom(format!("invalid off_bytes: {off_bytes}")));
}
let cells: u32 = buffered.unpack_as_with::<_, VarNBytes>(size_bytes)?;
let roots: u32 = buffered.unpack_as_with::<_, VarNBytes>(size_bytes)?;
let absent: u32 = buffered.unpack_as_with::<_, VarNBytes>(size_bytes)?;
if roots + absent > cells {
return Err(Error::custom("roots + absent > cells"));
}
let _tot_cells_size: usize = buffered.unpack_as_with::<_, VarNBytes>(off_bytes)?;
let root_list = if tag == Self::GENERIC_BOC_TAG {
buffered
.unpack_iter_as_with::<_, VarNBytes>(size_bytes)
.take(roots as usize)
.collect::<Result<_, _>>()?
} else {
Vec::new()
};
if has_idx {
let _index: Vec<u32> = buffered
.unpack_iter_as_with::<_, VarNBytes>(off_bytes)
.take(cells as usize)
.collect::<Result<_, _>>()?;
}
let cell_data: Vec<RawCell> = buffered
.unpack_iter_with(size_bytes)
.take(cells as usize)
.collect::<Result<_, _>>()
.context("cell_data")?;
let buf = buffered.into_writer();
if buf.len() % 8 != 0 {
return Err(Error::custom("produced stream is not byte-aligned"));
}
if has_crc32c {
let cs = u32::from_le_bytes(reader.unpack()?);
if cs != CRC_32_ISCSI.checksum(buf.as_raw_slice()) {
return Err(Error::custom("CRC mismatch"));
}
}
Ok(RawBagOfCells {
cells: cell_data,
roots: root_list,
})
}
}
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
pub(crate) struct RawCell {
pub data: BitVec<u8, Msb0>,
pub references: Vec<u32>,
pub level: u8,
}
impl BitUnpackWithArgs for RawCell {
type Args = u32;
fn unpack_with<R>(mut reader: R, size_bytes: Self::Args) -> Result<Self, R::Error>
where
R: BitReader,
{
let refs_descriptor: u8 = reader.unpack()?;
let level: u8 = refs_descriptor >> 5;
let _is_exotic: bool = refs_descriptor >> 3 & 0b1 == 1;
let ref_num: usize = refs_descriptor as usize & 0b111;
let bits_descriptor: u8 = reader.unpack()?;
let num_bytes: usize = ((bits_descriptor >> 1) + (bits_descriptor & 1)) as usize;
let full_bytes = (bits_descriptor & 1) == 0;
let mut data = reader.read_bitvec(num_bytes * 8)?;
if !data.is_empty() && !full_bytes {
let trailing_zeros = data.trailing_zeros();
if trailing_zeros >= 8 {
return Err(Error::custom("last byte must be non zero"));
}
data.truncate(data.len() - trailing_zeros - 1);
}
let references: Vec<u32> = reader
.unpack_iter_as_with::<_, VarNBytes>(size_bytes)
.take(ref_num)
.collect::<Result<_, _>>()?;
Ok(RawCell {
data,
references,
level,
})
}
}
impl BitPackWithArgs for RawCell {
type Args = u32;
fn pack_with<W>(&self, mut writer: W, ref_size_bytes: Self::Args) -> Result<(), W::Error>
where
W: BitWriter,
{
let level: u8 = 0;
let is_exotic: u8 = 0;
let refs_descriptor: u8 = self.references.len() as u8 + is_exotic * 8 + level * 32;
writer.pack(refs_descriptor)?;
let padding_bits = self.data.len() % 8;
let full_bytes = padding_bits == 0;
let data_bytes = (self.data.len() + 7) / 8;
let bits_descriptor: u8 = data_bytes as u8 * 2 - if full_bytes { 0 } else { 1 }; writer.pack(bits_descriptor)?;
writer.pack(self.data.as_bitslice())?;
if !full_bytes {
writer.write_bit(true)?;
writer.repeat_bit(8 - padding_bits - 1, false)?;
}
writer.pack_many_as_with::<_, &VarNBytes>(&self.references, ref_size_bytes)?;
Ok(())
}
}
impl RawCell {
fn size(&self, ref_size_bytes: u32) -> u32 {
let data_len: u32 = (self.data.len() as u32 + 7) / 8;
2 + data_len + self.references.len() as u32 * ref_size_bytes
}
}