use std::ops::Deref;
use smallvec::SmallVec;
use super::BocTag;
use crate::cell::{Cell, CellDescriptor, CellParts, Finalizer, LevelMask, MAX_REF_COUNT};
use crate::util::{unlikely, ArrayVec};
#[cfg(feature = "stats")]
use crate::cell::CellTreeStats;
#[derive(Debug, Default, Clone)]
pub struct Options {
pub min_roots: Option<usize>,
pub max_roots: Option<usize>,
}
impl Options {
pub const fn exact(number: usize) -> Self {
Self {
min_roots: Some(number),
max_roots: Some(number),
}
}
}
pub struct BocHeader<'a> {
ref_size: usize,
cells: SmallVec<[&'a [u8]; CELLS_ON_STACK]>,
roots: SmallVec<[u32; ROOTS_ON_STACK]>,
}
impl<'a> BocHeader<'a> {
pub fn decode(data: &'a [u8], options: &Options) -> Result<Self, Error> {
let mut reader = BocReader::new(data.len());
if unlikely(!reader.require(6)) {
return Err(Error::UnexpectedEof);
}
debug_assert!(data.len() >= 6);
let [flags, offset_size] = unsafe { *(data.as_ptr().add(4) as *const [u8; 2]) };
let has_index;
let has_crc;
let has_cache_bits;
let ref_size;
let supports_multiple_roots;
let boc_tag = unsafe { reader.read_boc_tag(data) };
match boc_tag {
Some(BocTag::Indexed) => {
has_index = true;
has_crc = false;
has_cache_bits = false;
ref_size = flags as usize;
supports_multiple_roots = false;
}
Some(BocTag::IndexedCrc32) => {
has_index = true;
has_crc = true;
has_cache_bits = false;
ref_size = flags as usize;
supports_multiple_roots = false;
}
Some(BocTag::Generic) => {
has_index = flags & 0b1000_0000 != 0;
has_crc = flags & 0b0100_0000 != 0;
has_cache_bits = flags & 0b0010_0000 != 0;
ref_size = (flags & 0b0000_0111) as usize;
supports_multiple_roots = true;
}
None => return Err(Error::UnknownBocTag),
}
if unlikely(has_cache_bits && !has_index) {
return Err(Error::InvalidHeader);
}
if unlikely(ref_size == 0 || ref_size > std::mem::size_of::<u32>()) {
return Err(Error::InvalidRefSize);
}
debug_assert!((1..=4).contains(&ref_size));
let offset_size = offset_size as usize;
if unlikely(offset_size == 0 || offset_size > std::mem::size_of::<usize>()) {
return Err(Error::InvalidOffsetSize);
}
debug_assert!((1..=8).contains(&offset_size));
reader.advance(6);
if unlikely(!reader.require(ref_size * 3 + offset_size)) {
return Err(Error::InvalidHeader);
}
debug_assert!(data.len() >= (6 + ref_size * 3 + offset_size));
let (cell_count, root_count, absent_count) = unsafe {
(
reader.read_next_be_uint_fast(data, ref_size),
reader.read_next_be_uint_fast(data, ref_size),
reader.read_next_be_uint_fast(data, ref_size),
)
};
if unlikely(root_count == 0) {
return Err(Error::RootCellNotFound);
}
if unlikely(!supports_multiple_roots && root_count > 1) {
return Err(Error::UnexpectedMultipleRoots);
}
if unlikely(root_count.saturating_add(absent_count) > cell_count) {
return Err(Error::TooManyRootCells);
}
if unlikely(absent_count > 0) {
return Err(Error::AbsentCellsNotSupported);
}
if let Some(min_roots) = options.min_roots {
if unlikely(root_count < min_roots) {
return Err(Error::TooFewRootCells);
}
}
if unlikely(root_count > options.max_roots.unwrap_or(MAX_ROOTS)) {
return Err(Error::TooManyRootCells);
}
debug_assert!(absent_count == 0 && (1..=MAX_ROOTS).contains(&root_count));
let total_cells_size = unsafe { reader.read_next_be_uint_full(data, offset_size) };
const MIN_CELL_SIZE: u64 = 2; let min_total_cell_size = (cell_count as u64) * (MIN_CELL_SIZE + ref_size as u64)
- (root_count * ref_size) as u64;
if unlikely(total_cells_size < min_total_cell_size) {
return Err(Error::InvalidTotalSize);
}
let max_cell_size = 2 + 4 * (2 + 32) + 128 + (MAX_REF_COUNT as u64) * ref_size as u64; if unlikely(total_cells_size > (cell_count as u64) * max_cell_size) {
return Err(Error::InvalidTotalSize);
}
if unlikely(!reader.require(root_count * ref_size)) {
return Err(Error::UnexpectedEof);
}
debug_assert!(data.len() >= (6 + ref_size * 3 + offset_size + root_count * ref_size));
let mut roots = SmallVec::with_capacity(root_count);
if supports_multiple_roots {
for _ in 0..root_count {
let root_index = unsafe { reader.read_next_be_uint_fast(data, ref_size) };
if unlikely(root_index >= cell_count) {
return Err(Error::RootOutOfBounds);
}
roots.push(root_index as u32);
}
} else {
roots.push(0);
}
let index_size = has_index as u64 * cell_count as u64 * offset_size as u64;
if unlikely(!reader.require((index_size + total_cells_size + has_crc as u64 * 4) as usize))
{
return Err(Error::UnexpectedEof);
}
if has_index {
reader.advance(cell_count * offset_size);
}
let cells_start_offset = reader.offset;
let mut cells = SmallVec::with_capacity(cell_count);
let data_ptr = data.as_ptr();
for _ in 0..cell_count {
if unlikely(!reader.require(2)) {
return Err(Error::UnexpectedEof);
}
let start_ptr = unsafe { data_ptr.add(reader.offset) };
let descriptor = unsafe { reader.read_cell_descriptor(data) };
if unlikely(descriptor.is_absent()) {
return Err(Error::AbsentCellsNotSupported);
}
let data_len = descriptor.byte_len() as usize;
let ref_count = descriptor.reference_count() as usize;
if unlikely(ref_count > MAX_REF_COUNT) {
return Err(Error::InvalidRef);
}
let mut data_offset = 0;
if unlikely(descriptor.store_hashes()) {
let level = descriptor.level_mask().level();
if descriptor.is_exotic() && ref_count == 0 && level > 0 {
return Err(Error::UnnormalizedCell);
}
data_offset = (32 + 2) * (level as usize + 1);
}
let total_len = 2 + data_offset + data_len + ref_count * ref_size;
if unlikely(!reader.require(total_len)) {
return Err(Error::UnexpectedEof);
}
if data_len > 0 && !descriptor.is_aligned() {
let byte_with_tag = unsafe { reader.read_cell_tag(data, data_offset, data_len) };
if unlikely(byte_with_tag & 0x7f == 0) {
return Err(Error::UnnormalizedCell);
}
}
reader.advance(total_len);
let cell = unsafe { std::slice::from_raw_parts(start_ptr, total_len) };
cells.push(cell);
}
#[cfg(not(fuzzing))]
if (cells_start_offset as u64).saturating_add(total_cells_size) != reader.offset as u64 {
return Err(Error::InvalidTotalSize);
}
#[cfg(not(fuzzing))]
if has_crc {
if unlikely(!reader.require(4)) {
return Err(Error::UnexpectedEof);
}
let is_checksum_correct = unsafe { reader.check_crc(data) };
if !is_checksum_correct {
return Err(Error::InvalidChecksum);
}
}
Ok(Self {
ref_size,
cells,
roots,
})
}
pub fn finalize(&self, finalizer: &mut dyn Finalizer) -> Result<ProcessedCells, Error> {
let ref_size = self.ref_size;
let cell_count = self.cells.len() as u32;
let mut res = SmallVec::<[Cell; CELLS_ON_STACK]>::new();
if res.try_reserve_exact(cell_count as usize).is_err() {
return Err(Error::InvalidTotalSize);
}
for cell in self.cells().iter().rev() {
unsafe {
let cell_ptr = cell.as_ptr();
let descriptor = CellDescriptor::new(*(cell_ptr as *const [u8; 2]));
let byte_len = descriptor.byte_len() as usize;
let mut data_ptr = cell_ptr.add(2);
if unlikely(descriptor.store_hashes()) {
let level = descriptor.level_mask().level();
debug_assert!(!descriptor.cell_type().is_pruned_branch());
data_ptr = data_ptr.add((32 + 2) * (level as usize + 1));
}
let data = std::slice::from_raw_parts(data_ptr, byte_len);
data_ptr = data_ptr.add(byte_len);
let bit_len = if descriptor.is_aligned() {
(byte_len * 8) as u16
} else if let Some(data) = data.last() {
byte_len as u16 * 8 - data.trailing_zeros() as u16 - 1
} else {
0
};
let mut references = ArrayVec::<Cell, MAX_REF_COUNT>::default();
let mut children_mask = LevelMask::EMPTY;
#[cfg(feature = "stats")]
let mut stats = CellTreeStats {
bit_count: bit_len as u64,
cell_count: 1,
};
for _ in 0..descriptor.reference_count() {
let child_index = read_be_uint_fast(data_ptr, ref_size);
if child_index >= cell_count {
return Err(Error::InvalidRef);
}
let child = match res.get((cell_count - child_index - 1) as usize) {
Some(child) => child.clone(),
None => return Err(Error::InvalidRefOrder),
};
{
let child = child.as_ref();
children_mask |= child.descriptor().level_mask();
#[cfg(feature = "stats")]
{
stats += child.stats();
}
}
references.push(child);
data_ptr = data_ptr.add(ref_size);
}
let ctx = CellParts {
#[cfg(feature = "stats")]
stats,
bit_len,
descriptor,
children_mask,
references,
data,
};
let cell = match finalizer.finalize_cell(ctx) {
Ok(cell) => cell,
Err(_) => return Err(Error::InvalidCell),
};
res.push(cell);
}
}
Ok(ProcessedCells(res))
}
pub fn ref_size(&self) -> usize {
self.ref_size
}
pub fn cells(&self) -> &[&'a [u8]] {
&self.cells
}
pub fn roots(&self) -> &[u32] {
&self.roots
}
}
pub struct ProcessedCells(SmallVec<[Cell; CELLS_ON_STACK]>);
impl ProcessedCells {
pub fn get(&self, index: u32) -> Option<Cell> {
self.0.get(self.0.len() - index as usize - 1).cloned()
}
}
struct BocReader {
len: usize,
offset: usize,
}
impl BocReader {
#[inline(always)]
const fn new(len: usize) -> Self {
Self { len, offset: 0 }
}
#[inline(always)]
const fn require(&self, len: usize) -> bool {
self.offset + len <= self.len
}
#[inline(always)]
fn advance(&mut self, bytes: usize) {
self.offset += bytes;
}
#[inline(always)]
unsafe fn read_boc_tag(&self, data: &[u8]) -> Option<BocTag> {
BocTag::from_bytes(*(data.as_ptr() as *const [u8; 4]))
}
#[inline(always)]
unsafe fn read_next_be_uint_fast(&mut self, data: &[u8], size: usize) -> usize {
let res = read_be_uint_fast(data.as_ptr().add(self.offset), size) as usize;
self.advance(size);
res
}
#[inline(always)]
unsafe fn read_next_be_uint_full(&mut self, data: &[u8], size: usize) -> u64 {
let data_ptr = data.as_ptr().add(self.offset);
let res = match size {
1..=4 => read_be_uint_fast(data_ptr, size) as u64,
5..=8 => {
let mut bytes = [0u8; 8];
std::ptr::copy_nonoverlapping(data_ptr, bytes.as_mut_ptr().add(8 - size), size);
u64::from_be_bytes(bytes)
}
_ => std::hint::unreachable_unchecked(),
};
self.advance(size);
res
}
#[inline(always)]
unsafe fn read_cell_descriptor(&self, data: &[u8]) -> CellDescriptor {
const _: () = assert!(std::mem::size_of::<CellDescriptor>() == 2);
*(data.as_ptr().add(self.offset) as *const CellDescriptor)
}
#[inline(always)]
unsafe fn read_cell_tag(&self, data: &[u8], data_offset: usize, data_len: usize) -> u8 {
*data
.as_ptr()
.add(self.offset + 2 + data_offset + data_len - 1)
}
#[inline(always)]
unsafe fn check_crc(&self, data: &[u8]) -> bool {
let data_ptr = data.as_ptr();
let crc_start_ptr = data_ptr.add(self.offset);
let parsed_crc = u32::from_le_bytes(*(crc_start_ptr as *const [u8; 4]));
let real_crc = crc32c::crc32c(std::slice::from_raw_parts(data_ptr, self.offset));
parsed_crc == real_crc
}
}
impl Deref for BocReader {
type Target = usize;
#[inline(always)]
fn deref(&self) -> &Self::Target {
&self.offset
}
}
#[inline(always)]
unsafe fn read_be_uint_fast(data_ptr: *const u8, size: usize) -> u32 {
match size {
1 => *data_ptr as u32,
2 => u16::from_be_bytes(*(data_ptr as *const [u8; 2])) as u32,
3 => {
let mut bytes = [0u8; 4];
std::ptr::copy_nonoverlapping(data_ptr, bytes.as_mut_ptr().add(1), 3);
u32::from_be_bytes(bytes)
}
4 => u32::from_be_bytes(*(data_ptr as *const [u8; 4])),
_ => std::hint::unreachable_unchecked(),
}
}
const CELLS_ON_STACK: usize = 16;
const ROOTS_ON_STACK: usize = 2;
const MAX_ROOTS: usize = 32;
#[derive(Debug, Copy, Clone, thiserror::Error)]
pub enum Error {
#[error("unexpected EOF")]
UnexpectedEof,
#[error("unknown BOC tag")]
UnknownBocTag,
#[error("invalid header")]
InvalidHeader,
#[error("ref index does not fit in `u32` type")]
InvalidRefSize,
#[error("cell offset does not fit in `usize` type")]
InvalidOffsetSize,
#[error("root cell not found")]
RootCellNotFound,
#[error("unexpected multiple roots")]
UnexpectedMultipleRoots,
#[error("too many root cells")]
TooManyRootCells,
#[error("absent cells are not supported")]
AbsentCellsNotSupported,
#[error("too few root cells")]
TooFewRootCells,
#[error("invalid total cells size")]
InvalidTotalSize,
#[error("root index out of bounds")]
RootOutOfBounds,
#[error("cell ref count not in range 0..=4")]
InvalidRef,
#[error("unnormalized cell")]
UnnormalizedCell,
#[error("invalid children order")]
InvalidRefOrder,
#[error("invalid cell")]
InvalidCell,
#[error("invalid checksum")]
InvalidChecksum,
}