tycho_types/boc/
de.rs

1use std::ops::Deref;
2
3use smallvec::SmallVec;
4
5use super::BocTag;
6use crate::cell::{Cell, CellContext, CellDescriptor, CellParts, LevelMask, MAX_REF_COUNT};
7use crate::util::{ArrayVec, read_be_u32_fast, read_be_u64_fast, unlikely};
8
9/// BOC deserialization options.
10#[derive(Debug, Default, Clone)]
11pub struct Options {
12    /// The minimum allowed root count.
13    pub min_roots: Option<usize>,
14    /// The maximum allowed root count.
15    pub max_roots: Option<usize>,
16}
17
18impl Options {
19    /// Constructs decoder options to expect exactly the specified number of roots.
20    pub const fn exact(number: usize) -> Self {
21        Self {
22            min_roots: Some(number),
23            max_roots: Some(number),
24        }
25    }
26}
27
28/// Parsed BOC header.
29pub struct BocHeader<'a> {
30    ref_size: usize,
31    cells: SmallVec<[&'a [u8]; CELLS_ON_STACK]>,
32    roots: SmallVec<[u32; ROOTS_ON_STACK]>,
33}
34
35impl<'a> BocHeader<'a> {
36    /// Decodes boc info from the specified bytes.
37    pub fn decode(data: &'a [u8], options: &Options) -> Result<Self, Error> {
38        let mut reader = BocReader::new(data.len());
39
40        // 4 bytes - tag
41        // 1 byte - flags
42        // 1 byte - offset size
43        if unlikely(!reader.require(6)) {
44            return Err(Error::UnexpectedEof);
45        }
46        debug_assert!(data.len() >= 6);
47
48        // SAFETY: we have already requested more than 6 bytes
49        let [flags, offset_size] = unsafe { *(data.as_ptr().add(4) as *const [u8; 2]) };
50
51        let has_index;
52        let has_crc;
53        let has_cache_bits;
54        let ref_size;
55        let supports_multiple_roots;
56
57        // SAFETY: we have already requested more than 4 bytes
58        let boc_tag = unsafe { reader.read_boc_tag(data) };
59        match boc_tag {
60            Some(BocTag::Indexed) => {
61                has_index = true;
62                has_crc = false;
63                has_cache_bits = false;
64                ref_size = flags as usize;
65                supports_multiple_roots = false;
66            }
67            Some(BocTag::IndexedCrc32) => {
68                has_index = true;
69                has_crc = true;
70                has_cache_bits = false;
71                ref_size = flags as usize;
72                supports_multiple_roots = false;
73            }
74            Some(BocTag::Generic) => {
75                has_index = flags & 0b1000_0000 != 0;
76                has_crc = flags & 0b0100_0000 != 0;
77                has_cache_bits = flags & 0b0010_0000 != 0;
78                ref_size = (flags & 0b0000_0111) as usize;
79                supports_multiple_roots = true;
80            }
81            None => return Err(Error::UnknownBocTag),
82        }
83
84        if unlikely(has_cache_bits && !has_index) {
85            return Err(Error::InvalidHeader);
86        }
87        if unlikely(ref_size == 0 || ref_size > std::mem::size_of::<u32>()) {
88            return Err(Error::InvalidRefSize);
89        }
90        debug_assert!((1..=4).contains(&ref_size));
91
92        let offset_size = offset_size as usize;
93        if unlikely(offset_size == 0 || offset_size > std::mem::size_of::<usize>()) {
94            return Err(Error::InvalidOffsetSize);
95        }
96        debug_assert!((1..=8).contains(&offset_size));
97
98        reader.advance(6);
99
100        // {ref_size} bytes - cell count
101        // {ref_size} bytes - root count
102        // {ref_size} bytes - absent cell count
103        // {offset_size} bytes - total cells size
104        if unlikely(!reader.require(ref_size * 3 + offset_size)) {
105            return Err(Error::InvalidHeader);
106        }
107        debug_assert!(data.len() >= (6 + ref_size * 3 + offset_size));
108
109        // SAFETY: we have already requested more than {ref_size}*3
110        // and {ref_size} is in range 1..=4
111        let (cell_count, root_count, absent_count) = unsafe {
112            (
113                reader.read_next_be_uint_fast(data, ref_size),
114                reader.read_next_be_uint_fast(data, ref_size),
115                reader.read_next_be_uint_fast(data, ref_size),
116            )
117        };
118
119        // Validate root or absent cells
120        if unlikely(root_count == 0) {
121            return Err(Error::RootCellNotFound);
122        }
123        if unlikely(!supports_multiple_roots && root_count > 1) {
124            return Err(Error::UnexpectedMultipleRoots);
125        }
126        if unlikely(root_count.saturating_add(absent_count) > cell_count) {
127            return Err(Error::TooManyRootCells);
128        }
129        if unlikely(absent_count > 0) {
130            return Err(Error::AbsentCellsNotSupported);
131        }
132        if let Some(min_roots) = options.min_roots
133            && unlikely(root_count < min_roots)
134        {
135            return Err(Error::TooFewRootCells);
136        }
137
138        {
139            let max_roots = options.max_roots.unwrap_or(MAX_ROOTS);
140            if unlikely(root_count > max_roots) {
141                return Err(Error::TooManyRootCells);
142            }
143            debug_assert!(absent_count == 0 && (1..=max_roots).contains(&root_count))
144        }
145
146        // SAFETY: we have already requested at least {ref_size}*3+{offset_size}
147        // and {ref_size} is in range 1..=8
148        let total_cells_size = unsafe { reader.read_next_be_uint_full(data, offset_size) };
149
150        const MIN_CELL_SIZE: u64 = 2; // [d1, d2]
151
152        // NOTE: `cell_count` is guaranteed to be in range of `u32`, so
153        // `u32::MAX * (2 + 4)` fits into u64 and doesn't require saturating/checked mul,
154        // `root_count` <= `cell_count` so this expression doesn't overflow
155        let min_total_cell_size = (cell_count as u64) * (MIN_CELL_SIZE + ref_size as u64)
156            - (root_count * ref_size) as u64;
157        if unlikely(total_cells_size < min_total_cell_size) {
158            return Err(Error::InvalidTotalSize);
159        }
160
161        // NOTE: `cell_count` is guaranteed to be in range of `u32`, so
162        // `u32::MAX * 282` fits into u64 and doesn't require saturating/checked mul
163        // 2 bytes - descriptor
164        // 4 * (2 + 32) - inline hashes and depths if presented
165        // 128 - max data length
166        // 4*{ref_size} - max references
167        let max_cell_size = 2 + 4 * (2 + 32) + 128 + (MAX_REF_COUNT as u64) * ref_size as u64; // ~282 bytes
168        if unlikely(total_cells_size > (cell_count as u64) * max_cell_size) {
169            return Err(Error::InvalidTotalSize);
170        }
171
172        // NOTE: `root_count` is in range ..=u32::MAX and `ref_size` is in range 1..=4
173        if unlikely(!reader.require(root_count * ref_size)) {
174            return Err(Error::UnexpectedEof);
175        }
176        debug_assert!(data.len() >= (6 + ref_size * 3 + offset_size + root_count * ref_size));
177
178        let mut roots = SmallVec::with_capacity(root_count);
179        if supports_multiple_roots {
180            for _ in 0..root_count {
181                // SAFETY: we have already requested for {root_count}*{ref_size}
182                let root_index = unsafe { reader.read_next_be_uint_fast(data, ref_size) };
183                if unlikely(root_index >= cell_count) {
184                    return Err(Error::RootOutOfBounds);
185                }
186                roots.push(root_index as u32);
187            }
188        } else {
189            roots.push(0);
190        }
191
192        // NOTE: `cell_count` is in range ..=u32::MAX, `offset_size` is in range 1..=8
193        let index_size = has_index as u64 * cell_count as u64 * offset_size as u64;
194        if unlikely(!reader.require((index_size + total_cells_size + has_crc as u64 * 4) as usize))
195        {
196            return Err(Error::UnexpectedEof);
197        }
198
199        if has_index {
200            reader.advance(cell_count * offset_size);
201        }
202
203        #[cfg(not(fuzzing))]
204        let cells_start_offset = reader.offset;
205
206        let mut cells = SmallVec::with_capacity(cell_count);
207
208        let data_ptr = data.as_ptr();
209        for _ in 0..cell_count {
210            // SAFETY: there are manual bounds checks for bytes offset
211            let start_ptr = unsafe { data_ptr.add(reader.offset) };
212            let total_len = ok!(CellParts::read_raw_cell_from_ptr(
213                start_ptr,
214                reader.len - reader.offset,
215                ref_size
216            ));
217            reader.advance(total_len);
218
219            // SAFETY: We have already requested {total_len} bytes
220            let cell = unsafe { std::slice::from_raw_parts(start_ptr, total_len) };
221            cells.push(cell);
222        }
223
224        // Check that `total_cells_size` is correct
225        #[cfg(not(fuzzing))]
226        if (cells_start_offset as u64).saturating_add(total_cells_size) != reader.offset as u64 {
227            return Err(Error::InvalidTotalSize);
228        }
229
230        // Verify checksum if specified
231        #[cfg(not(fuzzing))]
232        if has_crc {
233            if unlikely(!reader.require(4)) {
234                return Err(Error::UnexpectedEof);
235            }
236
237            // SAFETY: we have already requested 4 bytes
238            let is_checksum_correct = unsafe { reader.check_crc(data) };
239            if !is_checksum_correct {
240                return Err(Error::InvalidChecksum);
241            }
242        }
243
244        Ok(Self {
245            ref_size,
246            cells,
247            roots,
248        })
249    }
250
251    /// Assembles cell tree from slices using the specified cell context.
252    pub fn finalize(&self, context: &dyn CellContext) -> Result<ProcessedCells, Error> {
253        let ref_size = self.ref_size;
254        let cell_count = self.cells.len() as u32;
255
256        // TODO: somehow reuse `cells` vec
257        let mut res = SmallVec::<[Cell; CELLS_ON_STACK]>::new();
258        if res.try_reserve_exact(cell_count as usize).is_err() {
259            return Err(Error::InvalidTotalSize);
260        }
261
262        for raw_cell in self.cells().iter().rev() {
263            // SAFETY: it is safe to construct `CellParts` from a `read_raw_cell_from_ptr` output
264            let ctx = unsafe {
265                ok!(CellParts::from_raw_cell(
266                    raw_cell, &res, cell_count, ref_size
267                ))
268            };
269
270            let cell = match context.finalize_cell(ctx) {
271                Ok(cell) => cell,
272                Err(_) => return Err(Error::InvalidCell),
273            };
274            res.push(cell);
275        }
276
277        Ok(ProcessedCells(res))
278    }
279
280    /// Cell index size in bytes. Guaranteed to be 4 at max.
281    pub fn ref_size(&self) -> usize {
282        self.ref_size
283    }
284
285    /// Slices of the unique cells.
286    pub fn cells(&self) -> &[&'a [u8]] {
287        &self.cells
288    }
289
290    /// Root indices.
291    pub fn roots(&self) -> &[u32] {
292        &self.roots
293    }
294}
295
296/// Array of processed cells.
297pub struct ProcessedCells(SmallVec<[Cell; CELLS_ON_STACK]>);
298
299impl ProcessedCells {
300    /// Returns a processed cell by index.
301    pub fn get(&self, index: u32) -> Option<Cell> {
302        self.0.get(self.0.len() - index as usize - 1).cloned()
303    }
304}
305
306/// Wrapper around indexed bytes slice access
307/// to eliminate bounds check.
308struct BocReader {
309    len: usize,
310    offset: usize,
311}
312
313impl BocReader {
314    #[inline(always)]
315    const fn new(len: usize) -> Self {
316        Self { len, offset: 0 }
317    }
318
319    #[inline(always)]
320    const fn require(&self, len: usize) -> bool {
321        self.offset + len <= self.len
322    }
323
324    #[inline(always)]
325    fn advance(&mut self, bytes: usize) {
326        self.offset += bytes;
327    }
328
329    #[inline(always)]
330    unsafe fn read_boc_tag(&self, data: &[u8]) -> Option<BocTag> {
331        unsafe { BocTag::from_bytes(*(data.as_ptr() as *const [u8; 4])) }
332    }
333
334    /// # Safety
335    ///
336    /// The following must be true:
337    /// - size must be in range 1..=4.
338    /// - data must be at least `self.offset + size` bytes long.
339    #[inline(always)]
340    unsafe fn read_next_be_uint_fast(&mut self, data: &[u8], size: usize) -> usize {
341        unsafe {
342            let res = read_be_u32_fast(data.as_ptr().add(self.offset), size) as usize;
343            self.advance(size);
344            res
345        }
346    }
347
348    /// # Safety
349    ///
350    /// The following must be true:
351    /// - size must be in range 1..=8.
352    /// - data must be at least `self.offset + size` bytes long.
353    #[inline(always)]
354    unsafe fn read_next_be_uint_full(&mut self, data: &[u8], size: usize) -> u64 {
355        unsafe {
356            let res = read_be_u64_fast(data.as_ptr().add(self.offset), size);
357            self.advance(size);
358            res
359        }
360    }
361
362    #[cfg(not(fuzzing))]
363    #[inline(always)]
364    unsafe fn check_crc(&self, data: &[u8]) -> bool {
365        unsafe {
366            let data_ptr = data.as_ptr();
367            let crc_start_ptr = data_ptr.add(self.offset);
368
369            let parsed_crc = u32::from_le_bytes(*(crc_start_ptr as *const [u8; 4]));
370            let real_crc = crc32c::crc32c(std::slice::from_raw_parts(data_ptr, self.offset));
371
372            parsed_crc == real_crc
373        }
374    }
375}
376
377impl Deref for BocReader {
378    type Target = usize;
379
380    #[inline(always)]
381    fn deref(&self) -> &Self::Target {
382        &self.offset
383    }
384}
385
386impl<'a> CellParts<'a> {
387    /// Reads cell parts from the raw cell slice.
388    ///
389    /// # Safety
390    ///
391    /// The following must be true:
392    /// - `bytes` must be a correct bytes representation of cell.
393    ///
394    /// NOTE: It is safe to use an unmodified output from `CellParts::read_raw_cell`.
395    pub unsafe fn from_raw_cell(
396        raw_cell: &'a [u8],
397        cells: &[Cell],
398        cell_count: u32,
399        ref_size: usize,
400    ) -> Result<Self, Error> {
401        unsafe {
402            let raw_cell_ptr = raw_cell.as_ptr();
403
404            let descriptor = CellDescriptor::new(*(raw_cell_ptr as *const [u8; 2]));
405            let data_len = descriptor.byte_len() as usize;
406
407            let mut data_ptr = raw_cell_ptr.add(2);
408            if unlikely(descriptor.store_hashes()) {
409                let level = descriptor.level_mask().level();
410                debug_assert!(!descriptor.cell_type().is_pruned_branch());
411                data_ptr = data_ptr.add((32 + 2) * (level as usize + 1));
412            }
413
414            let data = std::slice::from_raw_parts(data_ptr, data_len);
415            data_ptr = data_ptr.add(data_len);
416
417            let bit_len = if descriptor.is_aligned() {
418                (data_len * 8) as u16
419            } else if let Some(data) = data.last() {
420                data_len as u16 * 8 - data.trailing_zeros() as u16 - 1
421            } else {
422                0
423            };
424
425            let mut references = ArrayVec::<Cell, MAX_REF_COUNT>::default();
426            let mut children_mask = LevelMask::EMPTY;
427
428            for _ in 0..descriptor.reference_count() {
429                let child_index = read_be_u32_fast(data_ptr, ref_size);
430                if child_index >= cell_count {
431                    return Err(Error::InvalidRef);
432                }
433
434                let child = match cells.get((cell_count - child_index - 1) as usize) {
435                    Some(child) => child.clone(),
436                    None => return Err(Error::InvalidRefOrder),
437                };
438
439                children_mask |= child.descriptor().level_mask();
440
441                references.push(child);
442
443                data_ptr = data_ptr.add(ref_size);
444            }
445
446            Ok(CellParts {
447                bit_len,
448                descriptor,
449                children_mask,
450                references,
451                data,
452            })
453        }
454    }
455
456    /// Reads a raw cell from the specified slice.
457    /// The returned slice is guaranteed to be a correct bytes representation of cell.
458    pub fn read_raw_cell<'b>(bytes: &mut &'b [u8], ref_size: usize) -> Result<&'b [u8], Error> {
459        let total_len = ok!(Self::read_raw_cell_from_ptr(
460            bytes.as_ptr(),
461            bytes.len(),
462            ref_size
463        ));
464        let (cell, rest) = bytes.split_at(total_len);
465        *bytes = rest;
466        Ok(cell)
467    }
468
469    fn read_raw_cell_from_ptr(
470        bytes_ptr: *const u8,
471        bytes_len: usize,
472        ref_size: usize,
473    ) -> Result<usize, Error> {
474        const _: () = assert!(std::mem::size_of::<CellDescriptor>() == 2);
475
476        if unlikely(bytes_len < 2) {
477            return Err(Error::UnexpectedEof);
478        }
479
480        // SAFETY: we have already checked the reader has 2 bytes
481        // and the `CellDescriptor` layout is fixed by `repr(C)`
482        let descriptor = CellDescriptor::new(unsafe { *(bytes_ptr as *const [u8; 2]) });
483
484        if unlikely(descriptor.is_absent()) {
485            return Err(Error::AbsentCellsNotSupported);
486        }
487
488        // 0b11111111 -> 0b01111111 + 1 = 0b10000000 = byte len 128, max bit len = 1023
489        // 0b11111110 -> 0b01111111 = byte len 127, bit len = 1016
490        let data_len = descriptor.byte_len() as usize;
491        let ref_count = descriptor.reference_count() as usize;
492        if unlikely(ref_count > MAX_REF_COUNT) {
493            return Err(Error::InvalidRef);
494        }
495
496        let mut data_offset = 0;
497        if unlikely(descriptor.store_hashes()) {
498            let level = descriptor.level_mask().level();
499            if descriptor.is_exotic() && ref_count == 0 && level > 0 {
500                // Pruned branch with `store_hashes` is invalid
501                return Err(Error::UnnormalizedCell);
502            }
503            data_offset = (32 + 2) * (level as usize + 1);
504        }
505
506        let total_len = 2 + data_offset + data_len + ref_count * ref_size;
507        if unlikely(bytes_len < total_len) {
508            return Err(Error::UnexpectedEof);
509        }
510
511        if data_len > 0 && !descriptor.is_aligned() {
512            // SAFETY: we have already requested 2+{data_len} bytes
513            let byte_with_tag = unsafe { *bytes_ptr.add(2 + data_offset + data_len - 1) };
514            if unlikely(byte_with_tag & 0x7f == 0) {
515                return Err(Error::UnnormalizedCell);
516            }
517        }
518
519        Ok(total_len)
520    }
521}
522
523const CELLS_ON_STACK: usize = 16;
524const ROOTS_ON_STACK: usize = 2;
525
526const MAX_ROOTS: usize = 32;
527
528/// Error type for BOC decoding related errors.
529#[derive(Debug, Copy, Clone, thiserror::Error)]
530pub enum Error {
531    /// EOF encountered during another operation.
532    #[error("unexpected EOF")]
533    UnexpectedEof,
534    /// Invalid magic bytes.
535    #[error("unknown BOC tag")]
536    UnknownBocTag,
537    /// Invalid BOC header.
538    #[error("invalid header")]
539    InvalidHeader,
540    /// References size is greater than 4.
541    #[error("ref index does not fit in `u32` type")]
542    InvalidRefSize,
543    /// Offset size is greater than 8.
544    #[error("cell offset does not fit in `usize` type")]
545    InvalidOffsetSize,
546    /// Root cell not found.
547    #[error("root cell not found")]
548    RootCellNotFound,
549    /// Specified BOC tag doesn't support multiple roots.
550    #[error("unexpected multiple roots")]
551    UnexpectedMultipleRoots,
552    /// The number of roots in BOC is greater than expected.
553    #[error("too many root cells")]
554    TooManyRootCells,
555    /// Absent cells are legacy therefore not supported.
556    #[error("absent cells are not supported")]
557    AbsentCellsNotSupported,
558    /// The number of roots in BOC is less than expected.
559    #[error("too few root cells")]
560    TooFewRootCells,
561    /// Total cells size mismatch.
562    #[error("invalid total cells size")]
563    InvalidTotalSize,
564    /// Invalid root cell index.
565    #[error("root index out of bounds")]
566    RootOutOfBounds,
567    /// Invalid child reference.
568    #[error("cell ref count not in range 0..=4")]
569    InvalidRef,
570    /// Suboptimal cells are treated as error.
571    #[error("unnormalized cell")]
572    UnnormalizedCell,
573    /// Possible graph loop detected.
574    #[error("invalid children order")]
575    InvalidRefOrder,
576    /// Failed to parse cell.
577    #[error("invalid cell")]
578    InvalidCell,
579    /// Crc mismatch.
580    #[error("invalid checksum")]
581    InvalidChecksum,
582}