tycho_types/boc/
de.rs

1use std::ops::Deref;
2
3use smallvec::SmallVec;
4
5use super::BocTag;
6#[cfg(feature = "stats")]
7use crate::cell::CellTreeStats;
8use crate::cell::{Cell, CellContext, CellDescriptor, CellParts, LevelMask, MAX_REF_COUNT};
9use crate::util::{ArrayVec, read_be_u32_fast, read_be_u64_fast, unlikely};
10
11/// BOC deserialization options.
12#[derive(Debug, Default, Clone)]
13pub struct Options {
14    /// The minimum allowed root count.
15    pub min_roots: Option<usize>,
16    /// The maximum allowed root count.
17    pub max_roots: Option<usize>,
18}
19
20impl Options {
21    /// Constructs decoder options to expect exactly the specified number of roots.
22    pub const fn exact(number: usize) -> Self {
23        Self {
24            min_roots: Some(number),
25            max_roots: Some(number),
26        }
27    }
28}
29
30/// Parsed BOC header.
31pub struct BocHeader<'a> {
32    ref_size: usize,
33    cells: SmallVec<[&'a [u8]; CELLS_ON_STACK]>,
34    roots: SmallVec<[u32; ROOTS_ON_STACK]>,
35}
36
37impl<'a> BocHeader<'a> {
38    /// Decodes boc info from the specified bytes.
39    pub fn decode(data: &'a [u8], options: &Options) -> Result<Self, Error> {
40        let mut reader = BocReader::new(data.len());
41
42        // 4 bytes - tag
43        // 1 byte - flags
44        // 1 byte - offset size
45        if unlikely(!reader.require(6)) {
46            return Err(Error::UnexpectedEof);
47        }
48        debug_assert!(data.len() >= 6);
49
50        // SAFETY: we have already requested more than 6 bytes
51        let [flags, offset_size] = unsafe { *(data.as_ptr().add(4) as *const [u8; 2]) };
52
53        let has_index;
54        let has_crc;
55        let has_cache_bits;
56        let ref_size;
57        let supports_multiple_roots;
58
59        // SAFETY: we have already requested more than 4 bytes
60        let boc_tag = unsafe { reader.read_boc_tag(data) };
61        match boc_tag {
62            Some(BocTag::Indexed) => {
63                has_index = true;
64                has_crc = false;
65                has_cache_bits = false;
66                ref_size = flags as usize;
67                supports_multiple_roots = false;
68            }
69            Some(BocTag::IndexedCrc32) => {
70                has_index = true;
71                has_crc = true;
72                has_cache_bits = false;
73                ref_size = flags as usize;
74                supports_multiple_roots = false;
75            }
76            Some(BocTag::Generic) => {
77                has_index = flags & 0b1000_0000 != 0;
78                has_crc = flags & 0b0100_0000 != 0;
79                has_cache_bits = flags & 0b0010_0000 != 0;
80                ref_size = (flags & 0b0000_0111) as usize;
81                supports_multiple_roots = true;
82            }
83            None => return Err(Error::UnknownBocTag),
84        }
85
86        if unlikely(has_cache_bits && !has_index) {
87            return Err(Error::InvalidHeader);
88        }
89        if unlikely(ref_size == 0 || ref_size > std::mem::size_of::<u32>()) {
90            return Err(Error::InvalidRefSize);
91        }
92        debug_assert!((1..=4).contains(&ref_size));
93
94        let offset_size = offset_size as usize;
95        if unlikely(offset_size == 0 || offset_size > std::mem::size_of::<usize>()) {
96            return Err(Error::InvalidOffsetSize);
97        }
98        debug_assert!((1..=8).contains(&offset_size));
99
100        reader.advance(6);
101
102        // {ref_size} bytes - cell count
103        // {ref_size} bytes - root count
104        // {ref_size} bytes - absent cell count
105        // {offset_size} bytes - total cells size
106        if unlikely(!reader.require(ref_size * 3 + offset_size)) {
107            return Err(Error::InvalidHeader);
108        }
109        debug_assert!(data.len() >= (6 + ref_size * 3 + offset_size));
110
111        // SAFETY: we have already requested more than {ref_size}*3
112        // and {ref_size} is in range 1..=4
113        let (cell_count, root_count, absent_count) = unsafe {
114            (
115                reader.read_next_be_uint_fast(data, ref_size),
116                reader.read_next_be_uint_fast(data, ref_size),
117                reader.read_next_be_uint_fast(data, ref_size),
118            )
119        };
120
121        // Validate root or absent cells
122        if unlikely(root_count == 0) {
123            return Err(Error::RootCellNotFound);
124        }
125        if unlikely(!supports_multiple_roots && root_count > 1) {
126            return Err(Error::UnexpectedMultipleRoots);
127        }
128        if unlikely(root_count.saturating_add(absent_count) > cell_count) {
129            return Err(Error::TooManyRootCells);
130        }
131        if unlikely(absent_count > 0) {
132            return Err(Error::AbsentCellsNotSupported);
133        }
134        if let Some(min_roots) = options.min_roots {
135            if unlikely(root_count < min_roots) {
136                return Err(Error::TooFewRootCells);
137            }
138        }
139
140        {
141            let max_roots = options.max_roots.unwrap_or(MAX_ROOTS);
142            if unlikely(root_count > max_roots) {
143                return Err(Error::TooManyRootCells);
144            }
145            debug_assert!(absent_count == 0 && (1..=max_roots).contains(&root_count))
146        }
147
148        // SAFETY: we have already requested at least {ref_size}*3+{offset_size}
149        // and {ref_size} is in range 1..=8
150        let total_cells_size = unsafe { reader.read_next_be_uint_full(data, offset_size) };
151
152        const MIN_CELL_SIZE: u64 = 2; // [d1, d2]
153
154        // NOTE: `cell_count` is guaranteed to be in range of `u32`, so
155        // `u32::MAX * (2 + 4)` fits into u64 and doesn't require saturating/checked mul,
156        // `root_count` <= `cell_count` so this expression doesn't overflow
157        let min_total_cell_size = (cell_count as u64) * (MIN_CELL_SIZE + ref_size as u64)
158            - (root_count * ref_size) as u64;
159        if unlikely(total_cells_size < min_total_cell_size) {
160            return Err(Error::InvalidTotalSize);
161        }
162
163        // NOTE: `cell_count` is guaranteed to be in range of `u32`, so
164        // `u32::MAX * 282` fits into u64 and doesn't require saturating/checked mul
165        // 2 bytes - descriptor
166        // 4 * (2 + 32) - inline hashes and depths if presented
167        // 128 - max data length
168        // 4*{ref_size} - max references
169        let max_cell_size = 2 + 4 * (2 + 32) + 128 + (MAX_REF_COUNT as u64) * ref_size as u64; // ~282 bytes
170        if unlikely(total_cells_size > (cell_count as u64) * max_cell_size) {
171            return Err(Error::InvalidTotalSize);
172        }
173
174        // NOTE: `root_count` is in range ..=u32::MAX and `ref_size` is in range 1..=4
175        if unlikely(!reader.require(root_count * ref_size)) {
176            return Err(Error::UnexpectedEof);
177        }
178        debug_assert!(data.len() >= (6 + ref_size * 3 + offset_size + root_count * ref_size));
179
180        let mut roots = SmallVec::with_capacity(root_count);
181        if supports_multiple_roots {
182            for _ in 0..root_count {
183                // SAFETY: we have already requested for {root_count}*{ref_size}
184                let root_index = unsafe { reader.read_next_be_uint_fast(data, ref_size) };
185                if unlikely(root_index >= cell_count) {
186                    return Err(Error::RootOutOfBounds);
187                }
188                roots.push(root_index as u32);
189            }
190        } else {
191            roots.push(0);
192        }
193
194        // NOTE: `cell_count` is in range ..=u32::MAX, `offset_size` is in range 1..=8
195        let index_size = has_index as u64 * cell_count as u64 * offset_size as u64;
196        if unlikely(!reader.require((index_size + total_cells_size + has_crc as u64 * 4) as usize))
197        {
198            return Err(Error::UnexpectedEof);
199        }
200
201        if has_index {
202            reader.advance(cell_count * offset_size);
203        }
204
205        #[cfg(not(fuzzing))]
206        let cells_start_offset = reader.offset;
207
208        let mut cells = SmallVec::with_capacity(cell_count);
209
210        let data_ptr = data.as_ptr();
211        for _ in 0..cell_count {
212            // SAFETY: there are manual bounds checks for bytes offset
213            let start_ptr = unsafe { data_ptr.add(reader.offset) };
214            let total_len = ok!(CellParts::read_raw_cell_from_ptr(
215                start_ptr,
216                reader.len - reader.offset,
217                ref_size
218            ));
219            reader.advance(total_len);
220
221            // SAFETY: We have already requested {total_len} bytes
222            let cell = unsafe { std::slice::from_raw_parts(start_ptr, total_len) };
223            cells.push(cell);
224        }
225
226        // Check that `total_cells_size` is correct
227        #[cfg(not(fuzzing))]
228        if (cells_start_offset as u64).saturating_add(total_cells_size) != reader.offset as u64 {
229            return Err(Error::InvalidTotalSize);
230        }
231
232        // Verify checksum if specified
233        #[cfg(not(fuzzing))]
234        if has_crc {
235            if unlikely(!reader.require(4)) {
236                return Err(Error::UnexpectedEof);
237            }
238
239            // SAFETY: we have already requested 4 bytes
240            let is_checksum_correct = unsafe { reader.check_crc(data) };
241            if !is_checksum_correct {
242                return Err(Error::InvalidChecksum);
243            }
244        }
245
246        Ok(Self {
247            ref_size,
248            cells,
249            roots,
250        })
251    }
252
253    /// Assembles cell tree from slices using the specified cell context.
254    pub fn finalize(&self, context: &dyn CellContext) -> Result<ProcessedCells, Error> {
255        let ref_size = self.ref_size;
256        let cell_count = self.cells.len() as u32;
257
258        // TODO: somehow reuse `cells` vec
259        let mut res = SmallVec::<[Cell; CELLS_ON_STACK]>::new();
260        if res.try_reserve_exact(cell_count as usize).is_err() {
261            return Err(Error::InvalidTotalSize);
262        }
263
264        for raw_cell in self.cells().iter().rev() {
265            // SAFETY: it is safe to construct `CellParts` from a `read_raw_cell_from_ptr` output
266            let ctx = unsafe {
267                ok!(CellParts::from_raw_cell(
268                    raw_cell, &res, cell_count, ref_size
269                ))
270            };
271
272            let cell = match context.finalize_cell(ctx) {
273                Ok(cell) => cell,
274                Err(_) => return Err(Error::InvalidCell),
275            };
276            res.push(cell);
277        }
278
279        Ok(ProcessedCells(res))
280    }
281
282    /// Cell index size in bytes. Guaranteed to be 4 at max.
283    pub fn ref_size(&self) -> usize {
284        self.ref_size
285    }
286
287    /// Slices of the unique cells.
288    pub fn cells(&self) -> &[&'a [u8]] {
289        &self.cells
290    }
291
292    /// Root indices.
293    pub fn roots(&self) -> &[u32] {
294        &self.roots
295    }
296}
297
298/// Array of processed cells.
299pub struct ProcessedCells(SmallVec<[Cell; CELLS_ON_STACK]>);
300
301impl ProcessedCells {
302    /// Returns a processed cell by index.
303    pub fn get(&self, index: u32) -> Option<Cell> {
304        self.0.get(self.0.len() - index as usize - 1).cloned()
305    }
306}
307
308/// Wrapper around indexed bytes slice access
309/// to eliminate bounds check.
310struct BocReader {
311    len: usize,
312    offset: usize,
313}
314
315impl BocReader {
316    #[inline(always)]
317    const fn new(len: usize) -> Self {
318        Self { len, offset: 0 }
319    }
320
321    #[inline(always)]
322    const fn require(&self, len: usize) -> bool {
323        self.offset + len <= self.len
324    }
325
326    #[inline(always)]
327    fn advance(&mut self, bytes: usize) {
328        self.offset += bytes;
329    }
330
331    #[inline(always)]
332    unsafe fn read_boc_tag(&self, data: &[u8]) -> Option<BocTag> {
333        unsafe { BocTag::from_bytes(*(data.as_ptr() as *const [u8; 4])) }
334    }
335
336    /// # Safety
337    ///
338    /// The following must be true:
339    /// - size must be in range 1..=4.
340    /// - data must be at least `self.offset + size` bytes long.
341    #[inline(always)]
342    unsafe fn read_next_be_uint_fast(&mut self, data: &[u8], size: usize) -> usize {
343        unsafe {
344            let res = read_be_u32_fast(data.as_ptr().add(self.offset), size) as usize;
345            self.advance(size);
346            res
347        }
348    }
349
350    /// # Safety
351    ///
352    /// The following must be true:
353    /// - size must be in range 1..=8.
354    /// - data must be at least `self.offset + size` bytes long.
355    #[inline(always)]
356    unsafe fn read_next_be_uint_full(&mut self, data: &[u8], size: usize) -> u64 {
357        unsafe {
358            let res = read_be_u64_fast(data.as_ptr().add(self.offset), size);
359            self.advance(size);
360            res
361        }
362    }
363
364    #[cfg(not(fuzzing))]
365    #[inline(always)]
366    unsafe fn check_crc(&self, data: &[u8]) -> bool {
367        unsafe {
368            let data_ptr = data.as_ptr();
369            let crc_start_ptr = data_ptr.add(self.offset);
370
371            let parsed_crc = u32::from_le_bytes(*(crc_start_ptr as *const [u8; 4]));
372            let real_crc = crc32c::crc32c(std::slice::from_raw_parts(data_ptr, self.offset));
373
374            parsed_crc == real_crc
375        }
376    }
377}
378
379impl Deref for BocReader {
380    type Target = usize;
381
382    #[inline(always)]
383    fn deref(&self) -> &Self::Target {
384        &self.offset
385    }
386}
387
388impl<'a> CellParts<'a> {
389    /// Reads cell parts from the raw cell slice.
390    ///
391    /// # Safety
392    ///
393    /// The following must be true:
394    /// - `bytes` must be a correct bytes representation of cell.
395    ///
396    /// NOTE: It is safe to use an unmodified output from `CellParts::read_raw_cell`.
397    pub unsafe fn from_raw_cell(
398        raw_cell: &'a [u8],
399        cells: &[Cell],
400        cell_count: u32,
401        ref_size: usize,
402    ) -> Result<Self, Error> {
403        unsafe {
404            let raw_cell_ptr = raw_cell.as_ptr();
405
406            let descriptor = CellDescriptor::new(*(raw_cell_ptr as *const [u8; 2]));
407            let data_len = descriptor.byte_len() as usize;
408
409            let mut data_ptr = raw_cell_ptr.add(2);
410            if unlikely(descriptor.store_hashes()) {
411                let level = descriptor.level_mask().level();
412                debug_assert!(!descriptor.cell_type().is_pruned_branch());
413                data_ptr = data_ptr.add((32 + 2) * (level as usize + 1));
414            }
415
416            let data = std::slice::from_raw_parts(data_ptr, data_len);
417            data_ptr = data_ptr.add(data_len);
418
419            let bit_len = if descriptor.is_aligned() {
420                (data_len * 8) as u16
421            } else if let Some(data) = data.last() {
422                data_len as u16 * 8 - data.trailing_zeros() as u16 - 1
423            } else {
424                0
425            };
426
427            let mut references = ArrayVec::<Cell, MAX_REF_COUNT>::default();
428            let mut children_mask = LevelMask::EMPTY;
429
430            #[cfg(feature = "stats")]
431            let mut stats = CellTreeStats {
432                bit_count: bit_len as u64,
433                cell_count: 1,
434            };
435
436            for _ in 0..descriptor.reference_count() {
437                let child_index = read_be_u32_fast(data_ptr, ref_size);
438                if child_index >= cell_count {
439                    return Err(Error::InvalidRef);
440                }
441
442                let child = match cells.get((cell_count - child_index - 1) as usize) {
443                    Some(child) => child.clone(),
444                    None => return Err(Error::InvalidRefOrder),
445                };
446
447                {
448                    let child = child.as_ref();
449                    children_mask |= child.descriptor().level_mask();
450                    #[cfg(feature = "stats")]
451                    {
452                        stats += child.stats();
453                    }
454                }
455                references.push(child);
456
457                data_ptr = data_ptr.add(ref_size);
458            }
459
460            Ok(CellParts {
461                #[cfg(feature = "stats")]
462                stats,
463                bit_len,
464                descriptor,
465                children_mask,
466                references,
467                data,
468            })
469        }
470    }
471
472    /// Reads a raw cell from the specified slice.
473    /// The returned slice is guaranteed to be a correct bytes representation of cell.
474    pub fn read_raw_cell<'b>(bytes: &mut &'b [u8], ref_size: usize) -> Result<&'b [u8], Error> {
475        let total_len = ok!(Self::read_raw_cell_from_ptr(
476            bytes.as_ptr(),
477            bytes.len(),
478            ref_size
479        ));
480        let (cell, rest) = bytes.split_at(total_len);
481        *bytes = rest;
482        Ok(cell)
483    }
484
485    fn read_raw_cell_from_ptr(
486        bytes_ptr: *const u8,
487        bytes_len: usize,
488        ref_size: usize,
489    ) -> Result<usize, Error> {
490        const _: () = assert!(std::mem::size_of::<CellDescriptor>() == 2);
491
492        if unlikely(bytes_len < 2) {
493            return Err(Error::UnexpectedEof);
494        }
495
496        // SAFETY: we have already checked the reader has 2 bytes
497        // and the `CellDescriptor` layout is fixed by `repr(C)`
498        let descriptor = CellDescriptor::new(unsafe { *(bytes_ptr as *const [u8; 2]) });
499
500        if unlikely(descriptor.is_absent()) {
501            return Err(Error::AbsentCellsNotSupported);
502        }
503
504        // 0b11111111 -> 0b01111111 + 1 = 0b10000000 = byte len 128, max bit len = 1023
505        // 0b11111110 -> 0b01111111 = byte len 127, bit len = 1016
506        let data_len = descriptor.byte_len() as usize;
507        let ref_count = descriptor.reference_count() as usize;
508        if unlikely(ref_count > MAX_REF_COUNT) {
509            return Err(Error::InvalidRef);
510        }
511
512        let mut data_offset = 0;
513        if unlikely(descriptor.store_hashes()) {
514            let level = descriptor.level_mask().level();
515            if descriptor.is_exotic() && ref_count == 0 && level > 0 {
516                // Pruned branch with `store_hashes` is invalid
517                return Err(Error::UnnormalizedCell);
518            }
519            data_offset = (32 + 2) * (level as usize + 1);
520        }
521
522        let total_len = 2 + data_offset + data_len + ref_count * ref_size;
523        if unlikely(bytes_len < total_len) {
524            return Err(Error::UnexpectedEof);
525        }
526
527        if data_len > 0 && !descriptor.is_aligned() {
528            // SAFETY: we have already requested 2+{data_len} bytes
529            let byte_with_tag = unsafe { *bytes_ptr.add(2 + data_offset + data_len - 1) };
530            if unlikely(byte_with_tag & 0x7f == 0) {
531                return Err(Error::UnnormalizedCell);
532            }
533        }
534
535        Ok(total_len)
536    }
537}
538
539const CELLS_ON_STACK: usize = 16;
540const ROOTS_ON_STACK: usize = 2;
541
542const MAX_ROOTS: usize = 32;
543
544/// Error type for BOC decoding related errors.
545#[derive(Debug, Copy, Clone, thiserror::Error)]
546pub enum Error {
547    /// EOF encountered during another operation.
548    #[error("unexpected EOF")]
549    UnexpectedEof,
550    /// Invalid magic bytes.
551    #[error("unknown BOC tag")]
552    UnknownBocTag,
553    /// Invalid BOC header.
554    #[error("invalid header")]
555    InvalidHeader,
556    /// References size is greater than 4.
557    #[error("ref index does not fit in `u32` type")]
558    InvalidRefSize,
559    /// Offset size is greater than 8.
560    #[error("cell offset does not fit in `usize` type")]
561    InvalidOffsetSize,
562    /// Root cell not found.
563    #[error("root cell not found")]
564    RootCellNotFound,
565    /// Specified BOC tag doesn't support multiple roots.
566    #[error("unexpected multiple roots")]
567    UnexpectedMultipleRoots,
568    /// The number of roots in BOC is greater than expected.
569    #[error("too many root cells")]
570    TooManyRootCells,
571    /// Absent cells are legacy therefore not supported.
572    #[error("absent cells are not supported")]
573    AbsentCellsNotSupported,
574    /// The number of roots in BOC is less than expected.
575    #[error("too few root cells")]
576    TooFewRootCells,
577    /// Total cells size mismatch.
578    #[error("invalid total cells size")]
579    InvalidTotalSize,
580    /// Invalid root cell index.
581    #[error("root index out of bounds")]
582    RootOutOfBounds,
583    /// Invalid child reference.
584    #[error("cell ref count not in range 0..=4")]
585    InvalidRef,
586    /// Suboptimal cells are treated as error.
587    #[error("unnormalized cell")]
588    UnnormalizedCell,
589    /// Possible graph loop detected.
590    #[error("invalid children order")]
591    InvalidRefOrder,
592    /// Failed to parse cell.
593    #[error("invalid cell")]
594    InvalidCell,
595    /// Crc mismatch.
596    #[error("invalid checksum")]
597    InvalidChecksum,
598}