everscale_types/boc/
de.rs

1use std::ops::Deref;
2
3use smallvec::SmallVec;
4
5use super::BocTag;
6use crate::cell::{Cell, CellContext, CellDescriptor, CellParts, LevelMask, MAX_REF_COUNT};
7use crate::util::{read_be_u32_fast, read_be_u64_fast, unlikely, ArrayVec};
8
9#[cfg(feature = "stats")]
10use crate::cell::CellTreeStats;
11
12/// BOC deserialization options.
13#[derive(Debug, Default, Clone)]
14pub struct Options {
15    /// The minimum allowed root count.
16    pub min_roots: Option<usize>,
17    /// The maximum allowed root count.
18    pub max_roots: Option<usize>,
19}
20
21impl Options {
22    /// Constructs decoder options to expect exactly the specified number of roots.
23    pub const fn exact(number: usize) -> Self {
24        Self {
25            min_roots: Some(number),
26            max_roots: Some(number),
27        }
28    }
29}
30
31/// Parsed BOC header.
32pub struct BocHeader<'a> {
33    ref_size: usize,
34    cells: SmallVec<[&'a [u8]; CELLS_ON_STACK]>,
35    roots: SmallVec<[u32; ROOTS_ON_STACK]>,
36}
37
38impl<'a> BocHeader<'a> {
39    /// Decodes boc info from the specified bytes.
40    pub fn decode(data: &'a [u8], options: &Options) -> Result<Self, Error> {
41        let mut reader = BocReader::new(data.len());
42
43        // 4 bytes - tag
44        // 1 byte - flags
45        // 1 byte - offset size
46        if unlikely(!reader.require(6)) {
47            return Err(Error::UnexpectedEof);
48        }
49        debug_assert!(data.len() >= 6);
50
51        // SAFETY: we have already requested more than 6 bytes
52        let [flags, offset_size] = unsafe { *(data.as_ptr().add(4) as *const [u8; 2]) };
53
54        let has_index;
55        let has_crc;
56        let has_cache_bits;
57        let ref_size;
58        let supports_multiple_roots;
59
60        // SAFETY: we have already requested more than 4 bytes
61        let boc_tag = unsafe { reader.read_boc_tag(data) };
62        match boc_tag {
63            Some(BocTag::Indexed) => {
64                has_index = true;
65                has_crc = false;
66                has_cache_bits = false;
67                ref_size = flags as usize;
68                supports_multiple_roots = false;
69            }
70            Some(BocTag::IndexedCrc32) => {
71                has_index = true;
72                has_crc = true;
73                has_cache_bits = false;
74                ref_size = flags as usize;
75                supports_multiple_roots = false;
76            }
77            Some(BocTag::Generic) => {
78                has_index = flags & 0b1000_0000 != 0;
79                has_crc = flags & 0b0100_0000 != 0;
80                has_cache_bits = flags & 0b0010_0000 != 0;
81                ref_size = (flags & 0b0000_0111) as usize;
82                supports_multiple_roots = true;
83            }
84            None => return Err(Error::UnknownBocTag),
85        }
86
87        if unlikely(has_cache_bits && !has_index) {
88            return Err(Error::InvalidHeader);
89        }
90        if unlikely(ref_size == 0 || ref_size > std::mem::size_of::<u32>()) {
91            return Err(Error::InvalidRefSize);
92        }
93        debug_assert!((1..=4).contains(&ref_size));
94
95        let offset_size = offset_size as usize;
96        if unlikely(offset_size == 0 || offset_size > std::mem::size_of::<usize>()) {
97            return Err(Error::InvalidOffsetSize);
98        }
99        debug_assert!((1..=8).contains(&offset_size));
100
101        reader.advance(6);
102
103        // {ref_size} bytes - cell count
104        // {ref_size} bytes - root count
105        // {ref_size} bytes - absent cell count
106        // {offset_size} bytes - total cells size
107        if unlikely(!reader.require(ref_size * 3 + offset_size)) {
108            return Err(Error::InvalidHeader);
109        }
110        debug_assert!(data.len() >= (6 + ref_size * 3 + offset_size));
111
112        // SAFETY: we have already requested more than {ref_size}*3
113        // and {ref_size} is in range 1..=4
114        let (cell_count, root_count, absent_count) = unsafe {
115            (
116                reader.read_next_be_uint_fast(data, ref_size),
117                reader.read_next_be_uint_fast(data, ref_size),
118                reader.read_next_be_uint_fast(data, ref_size),
119            )
120        };
121
122        // Validate root or absent cells
123        if unlikely(root_count == 0) {
124            return Err(Error::RootCellNotFound);
125        }
126        if unlikely(!supports_multiple_roots && root_count > 1) {
127            return Err(Error::UnexpectedMultipleRoots);
128        }
129        if unlikely(root_count.saturating_add(absent_count) > cell_count) {
130            return Err(Error::TooManyRootCells);
131        }
132        if unlikely(absent_count > 0) {
133            return Err(Error::AbsentCellsNotSupported);
134        }
135        if let Some(min_roots) = options.min_roots {
136            if unlikely(root_count < min_roots) {
137                return Err(Error::TooFewRootCells);
138            }
139        }
140
141        {
142            let max_roots = options.max_roots.unwrap_or(MAX_ROOTS);
143            if unlikely(root_count > max_roots) {
144                return Err(Error::TooManyRootCells);
145            }
146            debug_assert!(absent_count == 0 && (1..=max_roots).contains(&root_count))
147        }
148
149        // SAFETY: we have already requested at least {ref_size}*3+{offset_size}
150        // and {ref_size} is in range 1..=8
151        let total_cells_size = unsafe { reader.read_next_be_uint_full(data, offset_size) };
152
153        const MIN_CELL_SIZE: u64 = 2; // [d1, d2]
154
155        // NOTE: `cell_count` is guaranteed to be in range of `u32`, so
156        // `u32::MAX * (2 + 4)` fits into u64 and doesn't require saturating/checked mul,
157        // `root_count` <= `cell_count` so this expression doesn't overflow
158        let min_total_cell_size = (cell_count as u64) * (MIN_CELL_SIZE + ref_size as u64)
159            - (root_count * ref_size) as u64;
160        if unlikely(total_cells_size < min_total_cell_size) {
161            return Err(Error::InvalidTotalSize);
162        }
163
164        // NOTE: `cell_count` is guaranteed to be in range of `u32`, so
165        // `u32::MAX * 282` fits into u64 and doesn't require saturating/checked mul
166        // 2 bytes - descriptor
167        // 4 * (2 + 32) - inline hashes and depths if presented
168        // 128 - max data length
169        // 4*{ref_size} - max references
170        let max_cell_size = 2 + 4 * (2 + 32) + 128 + (MAX_REF_COUNT as u64) * ref_size as u64; // ~282 bytes
171        if unlikely(total_cells_size > (cell_count as u64) * max_cell_size) {
172            return Err(Error::InvalidTotalSize);
173        }
174
175        // NOTE: `root_count` is in range ..=u32::MAX and `ref_size` is in range 1..=4
176        if unlikely(!reader.require(root_count * ref_size)) {
177            return Err(Error::UnexpectedEof);
178        }
179        debug_assert!(data.len() >= (6 + ref_size * 3 + offset_size + root_count * ref_size));
180
181        let mut roots = SmallVec::with_capacity(root_count);
182        if supports_multiple_roots {
183            for _ in 0..root_count {
184                // SAFETY: we have already requested for {root_count}*{ref_size}
185                let root_index = unsafe { reader.read_next_be_uint_fast(data, ref_size) };
186                if unlikely(root_index >= cell_count) {
187                    return Err(Error::RootOutOfBounds);
188                }
189                roots.push(root_index as u32);
190            }
191        } else {
192            roots.push(0);
193        }
194
195        // NOTE: `cell_count` is in range ..=u32::MAX, `offset_size` is in range 1..=8
196        let index_size = has_index as u64 * cell_count as u64 * offset_size as u64;
197        if unlikely(!reader.require((index_size + total_cells_size + has_crc as u64 * 4) as usize))
198        {
199            return Err(Error::UnexpectedEof);
200        }
201
202        if has_index {
203            reader.advance(cell_count * offset_size);
204        }
205
206        #[cfg(not(fuzzing))]
207        let cells_start_offset = reader.offset;
208
209        let mut cells = SmallVec::with_capacity(cell_count);
210
211        let data_ptr = data.as_ptr();
212        for _ in 0..cell_count {
213            // SAFETY: there are manual bounds checks for bytes offset
214            let start_ptr = unsafe { data_ptr.add(reader.offset) };
215            let total_len = ok!(CellParts::read_raw_cell_from_ptr(
216                start_ptr,
217                reader.len - reader.offset,
218                ref_size
219            ));
220            reader.advance(total_len);
221
222            // SAFETY: We have already requested {total_len} bytes
223            let cell = unsafe { std::slice::from_raw_parts(start_ptr, total_len) };
224            cells.push(cell);
225        }
226
227        // Check that `total_cells_size` is correct
228        #[cfg(not(fuzzing))]
229        if (cells_start_offset as u64).saturating_add(total_cells_size) != reader.offset as u64 {
230            return Err(Error::InvalidTotalSize);
231        }
232
233        // Verify checksum if specified
234        #[cfg(not(fuzzing))]
235        if has_crc {
236            if unlikely(!reader.require(4)) {
237                return Err(Error::UnexpectedEof);
238            }
239
240            // SAFETY: we have already requested 4 bytes
241            let is_checksum_correct = unsafe { reader.check_crc(data) };
242            if !is_checksum_correct {
243                return Err(Error::InvalidChecksum);
244            }
245        }
246
247        Ok(Self {
248            ref_size,
249            cells,
250            roots,
251        })
252    }
253
254    /// Assembles cell tree from slices using the specified cell context.
255    pub fn finalize(&self, context: &mut dyn CellContext) -> Result<ProcessedCells, Error> {
256        let ref_size = self.ref_size;
257        let cell_count = self.cells.len() as u32;
258
259        // TODO: somehow reuse `cells` vec
260        let mut res = SmallVec::<[Cell; CELLS_ON_STACK]>::new();
261        if res.try_reserve_exact(cell_count as usize).is_err() {
262            return Err(Error::InvalidTotalSize);
263        }
264
265        for raw_cell in self.cells().iter().rev() {
266            // SAFETY: it is safe to construct `CellParts` from a `read_raw_cell_from_ptr` output
267            let ctx = unsafe {
268                ok!(CellParts::from_raw_cell(
269                    raw_cell, &res, cell_count, ref_size
270                ))
271            };
272
273            let cell = match context.finalize_cell(ctx) {
274                Ok(cell) => cell,
275                Err(_) => return Err(Error::InvalidCell),
276            };
277            res.push(cell);
278        }
279
280        Ok(ProcessedCells(res))
281    }
282
283    /// Cell index size in bytes. Guaranteed to be 4 at max.
284    pub fn ref_size(&self) -> usize {
285        self.ref_size
286    }
287
288    /// Slices of the unique cells.
289    pub fn cells(&self) -> &[&'a [u8]] {
290        &self.cells
291    }
292
293    /// Root indices.
294    pub fn roots(&self) -> &[u32] {
295        &self.roots
296    }
297}
298
299/// Array of processed cells.
300pub struct ProcessedCells(SmallVec<[Cell; CELLS_ON_STACK]>);
301
302impl ProcessedCells {
303    /// Returns a processed cell by index.
304    pub fn get(&self, index: u32) -> Option<Cell> {
305        self.0.get(self.0.len() - index as usize - 1).cloned()
306    }
307}
308
309/// Wrapper around indexed bytes slice access
310/// to eliminate bounds check.
311struct BocReader {
312    len: usize,
313    offset: usize,
314}
315
316impl BocReader {
317    #[inline(always)]
318    const fn new(len: usize) -> Self {
319        Self { len, offset: 0 }
320    }
321
322    #[inline(always)]
323    const fn require(&self, len: usize) -> bool {
324        self.offset + len <= self.len
325    }
326
327    #[inline(always)]
328    fn advance(&mut self, bytes: usize) {
329        self.offset += bytes;
330    }
331
332    #[inline(always)]
333    unsafe fn read_boc_tag(&self, data: &[u8]) -> Option<BocTag> {
334        BocTag::from_bytes(*(data.as_ptr() as *const [u8; 4]))
335    }
336
337    /// # Safety
338    ///
339    /// The following must be true:
340    /// - size must be in range 1..=4.
341    /// - data must be at least `self.offset + size` bytes long.
342    #[inline(always)]
343    unsafe fn read_next_be_uint_fast(&mut self, data: &[u8], size: usize) -> usize {
344        let res = read_be_u32_fast(data.as_ptr().add(self.offset), size) as usize;
345        self.advance(size);
346        res
347    }
348
349    /// # Safety
350    ///
351    /// The following must be true:
352    /// - size must be in range 1..=8.
353    /// - data must be at least `self.offset + size` bytes long.
354    #[inline(always)]
355    unsafe fn read_next_be_uint_full(&mut self, data: &[u8], size: usize) -> u64 {
356        let res = read_be_u64_fast(data.as_ptr().add(self.offset), size);
357        self.advance(size);
358        res
359    }
360
361    #[cfg(not(fuzzing))]
362    #[inline(always)]
363    unsafe fn check_crc(&self, data: &[u8]) -> bool {
364        let data_ptr = data.as_ptr();
365        let crc_start_ptr = data_ptr.add(self.offset);
366
367        let parsed_crc = u32::from_le_bytes(*(crc_start_ptr as *const [u8; 4]));
368        let real_crc = crc32c::crc32c(std::slice::from_raw_parts(data_ptr, self.offset));
369
370        parsed_crc == real_crc
371    }
372}
373
374impl Deref for BocReader {
375    type Target = usize;
376
377    #[inline(always)]
378    fn deref(&self) -> &Self::Target {
379        &self.offset
380    }
381}
382
383impl<'a> CellParts<'a> {
384    /// Reads cell parts from the raw cell slice.
385    ///
386    /// # Safety
387    ///
388    /// The following must be true:
389    /// - `bytes` must be a correct bytes representation of cell.
390    ///
391    /// NOTE: It is safe to use an unmodified output from `CellParts::read_raw_cell`.
392    pub unsafe fn from_raw_cell(
393        raw_cell: &'a [u8],
394        cells: &[Cell],
395        cell_count: u32,
396        ref_size: usize,
397    ) -> Result<Self, Error> {
398        let raw_cell_ptr = raw_cell.as_ptr();
399
400        let descriptor = CellDescriptor::new(*(raw_cell_ptr as *const [u8; 2]));
401        let data_len = descriptor.byte_len() as usize;
402
403        let mut data_ptr = raw_cell_ptr.add(2);
404        if unlikely(descriptor.store_hashes()) {
405            let level = descriptor.level_mask().level();
406            debug_assert!(!descriptor.cell_type().is_pruned_branch());
407            data_ptr = data_ptr.add((32 + 2) * (level as usize + 1));
408        }
409
410        let data = std::slice::from_raw_parts(data_ptr, data_len);
411        data_ptr = data_ptr.add(data_len);
412
413        let bit_len = if descriptor.is_aligned() {
414            (data_len * 8) as u16
415        } else if let Some(data) = data.last() {
416            data_len as u16 * 8 - data.trailing_zeros() as u16 - 1
417        } else {
418            0
419        };
420
421        let mut references = ArrayVec::<Cell, MAX_REF_COUNT>::default();
422        let mut children_mask = LevelMask::EMPTY;
423
424        #[cfg(feature = "stats")]
425        let mut stats = CellTreeStats {
426            bit_count: bit_len as u64,
427            cell_count: 1,
428        };
429
430        for _ in 0..descriptor.reference_count() {
431            let child_index = read_be_u32_fast(data_ptr, ref_size);
432            if child_index >= cell_count {
433                return Err(Error::InvalidRef);
434            }
435
436            let child = match cells.get((cell_count - child_index - 1) as usize) {
437                Some(child) => child.clone(),
438                None => return Err(Error::InvalidRefOrder),
439            };
440
441            {
442                let child = child.as_ref();
443                children_mask |= child.descriptor().level_mask();
444                #[cfg(feature = "stats")]
445                {
446                    stats += child.stats();
447                }
448            }
449            references.push(child);
450
451            data_ptr = data_ptr.add(ref_size);
452        }
453
454        Ok(CellParts {
455            #[cfg(feature = "stats")]
456            stats,
457            bit_len,
458            descriptor,
459            children_mask,
460            references,
461            data,
462        })
463    }
464
465    /// Reads a raw cell from the specified slice.
466    /// The returned slice is guaranteed to be a correct bytes representation of cell.
467    pub fn read_raw_cell<'b>(bytes: &mut &'b [u8], ref_size: usize) -> Result<&'b [u8], Error> {
468        let total_len = ok!(Self::read_raw_cell_from_ptr(
469            bytes.as_ptr(),
470            bytes.len(),
471            ref_size
472        ));
473        let (cell, rest) = bytes.split_at(total_len);
474        *bytes = rest;
475        Ok(cell)
476    }
477
478    fn read_raw_cell_from_ptr(
479        bytes_ptr: *const u8,
480        bytes_len: usize,
481        ref_size: usize,
482    ) -> Result<usize, Error> {
483        const _: () = assert!(std::mem::size_of::<CellDescriptor>() == 2);
484
485        if unlikely(bytes_len < 2) {
486            return Err(Error::UnexpectedEof);
487        }
488
489        // SAFETY: we have already checked the reader has 2 bytes
490        // and the `CellDescriptor` layout is fixed by `repr(C)`
491        let descriptor = CellDescriptor::new(unsafe { *(bytes_ptr as *const [u8; 2]) });
492
493        if unlikely(descriptor.is_absent()) {
494            return Err(Error::AbsentCellsNotSupported);
495        }
496
497        // 0b11111111 -> 0b01111111 + 1 = 0b10000000 = byte len 128, max bit len = 1023
498        // 0b11111110 -> 0b01111111 = byte len 127, bit len = 1016
499        let data_len = descriptor.byte_len() as usize;
500        let ref_count = descriptor.reference_count() as usize;
501        if unlikely(ref_count > MAX_REF_COUNT) {
502            return Err(Error::InvalidRef);
503        }
504
505        let mut data_offset = 0;
506        if unlikely(descriptor.store_hashes()) {
507            let level = descriptor.level_mask().level();
508            if descriptor.is_exotic() && ref_count == 0 && level > 0 {
509                // Pruned branch with `store_hashes` is invalid
510                return Err(Error::UnnormalizedCell);
511            }
512            data_offset = (32 + 2) * (level as usize + 1);
513        }
514
515        let total_len = 2 + data_offset + data_len + ref_count * ref_size;
516        if unlikely(bytes_len < total_len) {
517            return Err(Error::UnexpectedEof);
518        }
519
520        if data_len > 0 && !descriptor.is_aligned() {
521            // SAFETY: we have already requested 2+{data_len} bytes
522            let byte_with_tag = unsafe { *bytes_ptr.add(2 + data_offset + data_len - 1) };
523            if unlikely(byte_with_tag & 0x7f == 0) {
524                return Err(Error::UnnormalizedCell);
525            }
526        }
527
528        Ok(total_len)
529    }
530}
531
532const CELLS_ON_STACK: usize = 16;
533const ROOTS_ON_STACK: usize = 2;
534
535const MAX_ROOTS: usize = 32;
536
537/// Error type for BOC decoding related errors.
538#[derive(Debug, Copy, Clone, thiserror::Error)]
539pub enum Error {
540    /// EOF encountered during another operation.
541    #[error("unexpected EOF")]
542    UnexpectedEof,
543    /// Invalid magic bytes.
544    #[error("unknown BOC tag")]
545    UnknownBocTag,
546    /// Invalid BOC header.
547    #[error("invalid header")]
548    InvalidHeader,
549    /// References size is greater than 4.
550    #[error("ref index does not fit in `u32` type")]
551    InvalidRefSize,
552    /// Offset size is greater than 8.
553    #[error("cell offset does not fit in `usize` type")]
554    InvalidOffsetSize,
555    /// Root cell not found.
556    #[error("root cell not found")]
557    RootCellNotFound,
558    /// Specified BOC tag doesn't support multiple roots.
559    #[error("unexpected multiple roots")]
560    UnexpectedMultipleRoots,
561    /// The number of roots in BOC is greater than expected.
562    #[error("too many root cells")]
563    TooManyRootCells,
564    /// Absent cells are legacy therefore not supported.
565    #[error("absent cells are not supported")]
566    AbsentCellsNotSupported,
567    /// The number of roots in BOC is less than expected.
568    #[error("too few root cells")]
569    TooFewRootCells,
570    /// Total cells size mismatch.
571    #[error("invalid total cells size")]
572    InvalidTotalSize,
573    /// Invalid root cell index.
574    #[error("root index out of bounds")]
575    RootOutOfBounds,
576    /// Invalid child reference.
577    #[error("cell ref count not in range 0..=4")]
578    InvalidRef,
579    /// Suboptimal cells are treated as error.
580    #[error("unnormalized cell")]
581    UnnormalizedCell,
582    /// Possible graph loop detected.
583    #[error("invalid children order")]
584    InvalidRefOrder,
585    /// Failed to parse cell.
586    #[error("invalid cell")]
587    InvalidCell,
588    /// Crc mismatch.
589    #[error("invalid checksum")]
590    InvalidChecksum,
591}