everscale_types/boc/
ser.rs

1use std::collections::HashMap;
2use std::hash::BuildHasher;
3
4use super::BocTag;
5use crate::cell::{CellDescriptor, DynCell, HashBytes};
6
7/// Intermediate BOC serializer state.
8pub struct BocHeader<'a, S = ahash::RandomState> {
9    root_rev_indices: Vec<u32>,
10    rev_indices: HashMap<&'a HashBytes, u32, S>,
11    rev_cells: Vec<&'a DynCell>,
12    total_data_size: u64,
13    reference_count: u64,
14    cell_count: u32,
15    without_hashes: bool,
16    include_crc: bool,
17}
18
19impl<S: BuildHasher + Default> Default for BocHeader<'_, S> {
20    #[inline]
21    fn default() -> Self {
22        Self {
23            root_rev_indices: Default::default(),
24            rev_indices: Default::default(),
25            rev_cells: Default::default(),
26            total_data_size: 0,
27            reference_count: 0,
28            cell_count: 0,
29            without_hashes: false,
30            include_crc: false,
31        }
32    }
33}
34
35impl<'a, S> BocHeader<'a, S>
36where
37    S: BuildHasher + Default,
38{
39    /// Creates an intermediate BOC serializer state with a single root.
40    pub fn with_root(root: &'a DynCell) -> Self {
41        let mut res = Self::default();
42        res.add_root(root);
43        res
44    }
45
46    /// Creates an empty intermediate BOC serializer state.
47    /// Reserves space for the specified number of cells.
48    #[inline]
49    pub fn with_capacity(capacity: usize) -> Self {
50        Self {
51            root_rev_indices: Default::default(),
52            rev_indices: HashMap::with_capacity_and_hasher(capacity, S::default()),
53            rev_cells: Vec::with_capacity(capacity),
54            total_data_size: 0,
55            reference_count: 0,
56            cell_count: 0,
57            without_hashes: false,
58            include_crc: false,
59        }
60    }
61
62    /// Creates an intermediate BOC serializer state with a single root.
63    /// Reserves space for the specified number of cells.
64    pub fn with_capacity_and_root(capacity: usize, root: &'a DynCell) -> Self {
65        let mut res = Self::with_capacity(capacity);
66        res.add_root(root);
67        res
68    }
69}
70
71impl<'a, S> BocHeader<'a, S>
72where
73    S: BuildHasher,
74{
75    /// Clears the header, removing all cells. Keeps the allocated memory for reuse.
76    pub fn clear(&mut self) {
77        self.root_rev_indices.clear();
78        self.rev_indices.clear();
79        self.rev_cells.clear();
80        self.total_data_size = 0;
81        self.reference_count = 0;
82        self.cell_count = 0;
83    }
84
85    /// Adds an additional root to the state.
86    pub fn add_root(&mut self, root: &'a DynCell) {
87        let root_rev_index = self.fill(root);
88        self.root_rev_indices.push(root_rev_index);
89    }
90
91    /// Includes CRC bytes in the encoded BOC.
92    #[inline]
93    pub fn with_crc(mut self, include_ctc: bool) -> Self {
94        self.include_crc = include_ctc;
95        self
96    }
97
98    /// Prevents hashes from being stored in the encoded BOC.
99    ///
100    /// (overwrites descriptor flag `store_hashes` during serialization).
101    #[inline]
102    pub fn without_hashes(mut self, without_hashes: bool) -> Self {
103        self.without_hashes = without_hashes;
104        self
105    }
106
107    /// Encodes cell trees into bytes.
108    pub fn encode(&self, target: &mut Vec<u8>) {
109        let target_len_before = target.len();
110        let header = self.encode_header(target);
111        self.encode_cells_chunk(&self.rev_cells, header.ref_size, target);
112        self.encode_crc(target_len_before, target);
113
114        debug_assert_eq!(
115            target.len() as u64,
116            target_len_before as u64 + header.total_size
117        );
118    }
119
120    /// Writes cell trees into the writer.
121    ///
122    /// NOTE: Use [`BocHeader::encode`] when possible since it's faster.
123    pub fn encode_to_writer<W: std::io::Write>(&self, mut writer: W) -> std::io::Result<()> {
124        const CELLS_CHUNK_SIZE: usize = 1000;
125        const P95_CELL_SIZE: usize = 128;
126
127        let mut crc = self.include_crc.then_some(0u32);
128        let mut total_size = 0;
129
130        let mut reset_chunk = |chunk: &mut Vec<u8>| {
131            if let Some(crc) = &mut crc {
132                *crc = crc32c::crc32c_append(*crc, chunk);
133            }
134            total_size += chunk.len() as u64;
135            chunk.clear();
136        };
137
138        let mut chunk = Vec::new();
139
140        // Write header
141        let header = self.encode_header(&mut chunk);
142        ok!(writer.write_all(&chunk));
143        reset_chunk(&mut chunk);
144
145        // Write cells
146        for cells in self.rev_cells.rchunks(CELLS_CHUNK_SIZE) {
147            chunk.reserve(cells.len() * P95_CELL_SIZE);
148            self.encode_cells_chunk(cells, header.ref_size, &mut chunk);
149            ok!(writer.write_all(&chunk));
150            reset_chunk(&mut chunk);
151        }
152
153        debug_assert!(chunk.is_empty());
154
155        if let Some(crc) = crc {
156            ok!(writer.write_all(&crc.to_le_bytes()));
157        }
158
159        debug_assert_eq!(total_size, header.total_size);
160        Ok(())
161    }
162
163    /// Encodes cell trees into bytes.
164    /// Uses `rayon` under the hood.
165    #[cfg(feature = "rayon")]
166    pub fn encode_rayon(&self, target: &mut Vec<u8>)
167    where
168        S: Send + Sync,
169    {
170        use rayon::iter::{IndexedParallelIterator, ParallelIterator};
171        use rayon::slice::ParallelSlice;
172
173        const CELLS_CHUNK_SIZE: usize = 5_000;
174        const P95_CELL_SIZE: usize = 128;
175
176        let target_len_before = target.len();
177        let header = self.encode_header(target);
178
179        if self.rev_cells.len() < CELLS_CHUNK_SIZE * 2 {
180            self.encode_cells_chunk(&self.rev_cells, header.ref_size, target);
181        } else {
182            let mut chunks = Vec::new();
183            self.rev_cells
184                .par_rchunks(CELLS_CHUNK_SIZE)
185                .map(|chunk| {
186                    let mut target = Vec::with_capacity(chunk.len() * P95_CELL_SIZE);
187                    self.encode_cells_chunk(chunk, header.ref_size, &mut target);
188                    target
189                })
190                .collect_into_vec(&mut chunks);
191            for chunk in chunks {
192                target.extend_from_slice(&chunk);
193            }
194        }
195
196        self.encode_crc(target_len_before, target);
197
198        debug_assert_eq!(
199            target.len() as u64,
200            target_len_before as u64 + header.total_size
201        );
202    }
203
204    /// Computes the encoded BOC size and other stuff.
205    pub fn compute_stats(&self) -> BocHeaderStats {
206        let root_count = self.root_rev_indices.len();
207
208        let ref_size = number_of_bytes_to_fit(self.cell_count as u64);
209
210        let total_cells_size = self.total_data_size
211            + (self.cell_count as u64 * 2) // all descriptor bytes
212            + (ref_size as u64 * self.reference_count);
213        let offset_size = number_of_bytes_to_fit(total_cells_size);
214
215        // 4 bytes - BOC tag
216        // 1 byte - flags
217        // 1 byte - offset size
218        // {ref_size} - cell count
219        // {ref_size} - root count
220        // {ref_size} - absent cell count
221        // {offset_size} - total cells size
222        // root_count * {ref_size} - root indices
223        // {total_cells_size} - cells
224        // include_crc * 4 - optional CRC32
225        let total_size = 4
226            + 2
227            + (ref_size as u64) * (3 + root_count as u64)
228            + (offset_size as u64)
229            + total_cells_size
230            + u64::from(self.include_crc) * 4;
231
232        BocHeaderStats {
233            offset_size,
234            ref_size,
235            total_cells_size,
236            total_size,
237        }
238    }
239
240    #[inline]
241    fn encode_header(&self, target: &mut Vec<u8>) -> BocHeaderStats {
242        let stats = self.compute_stats();
243
244        let root_count = self.root_rev_indices.len();
245
246        // NOTE: `ref_size` will be in range 1..=4 because `self.cell_count`
247        // is `u32`, and there is at least one cell (see Self::new)
248        debug_assert!((1..=4).contains(&stats.ref_size));
249
250        // NOTE: `offset_size` will be in range 1..=8 because `self.cell_count`
251        // is at least 1, and `total_cells_size` is `u64`
252        debug_assert!((1..=8).contains(&stats.offset_size));
253
254        let flags = (stats.ref_size as u8) | (u8::from(self.include_crc) * 0b0100_0000);
255
256        target.reserve(stats.total_size as usize);
257
258        target.extend_from_slice(&BocTag::GENERIC);
259        target.extend_from_slice(&[flags, stats.offset_size as u8]);
260        target.extend_from_slice(&self.cell_count.to_be_bytes()[4 - stats.ref_size..]);
261        target.extend_from_slice(&(root_count as u32).to_be_bytes()[4 - stats.ref_size..]);
262        target.extend_from_slice(&[0; 4][4 - stats.ref_size..]);
263        target.extend_from_slice(&stats.total_cells_size.to_be_bytes()[8 - stats.offset_size..]);
264
265        for rev_index in &self.root_rev_indices {
266            let root_index = self.cell_count - rev_index - 1;
267            target.extend_from_slice(&root_index.to_be_bytes()[4 - stats.ref_size..]);
268        }
269
270        stats
271    }
272
273    #[inline]
274    fn encode_cells_chunk(&self, chunk: &[&DynCell], ref_size: usize, target: &mut Vec<u8>) {
275        let descriptor_mask = !(u8::from(self.without_hashes) * CellDescriptor::STORE_HASHES_MASK);
276
277        for cell in chunk.iter().rev() {
278            let mut descriptor = cell.descriptor();
279            descriptor.d1 &= descriptor_mask;
280            target.extend_from_slice(&[descriptor.d1, descriptor.d2]);
281            if descriptor.store_hashes() {
282                let level_mask = descriptor.level_mask();
283                for level in level_mask {
284                    target.extend_from_slice(cell.hash(level).as_ref());
285                }
286                for level in level_mask {
287                    target.extend_from_slice(&cell.depth(level).to_be_bytes());
288                }
289            }
290            target.extend_from_slice(cell.data());
291            for child in cell.references() {
292                if let Some(rev_index) = self.rev_indices.get(child.repr_hash()) {
293                    let rev_index = self.cell_count - *rev_index - 1;
294                    target.extend_from_slice(&rev_index.to_be_bytes()[4 - ref_size..]);
295                } else {
296                    debug_assert!(false, "child not found");
297                }
298            }
299        }
300    }
301
302    #[inline]
303    fn encode_crc(&self, target_len_before: usize, target: &mut Vec<u8>) {
304        if self.include_crc {
305            let target_len_after = target.len();
306            debug_assert!(target_len_before < target_len_after);
307
308            let crc = crc32c::crc32c(&target[target_len_before..target_len_after]);
309            target.extend_from_slice(&crc.to_le_bytes());
310        }
311    }
312
313    fn fill(&mut self, root: &'a DynCell) -> u32 {
314        const SAFE_DEPTH: u16 = 128;
315
316        if let Some(index) = self.rev_indices.get(root.repr_hash()) {
317            return *index;
318        }
319
320        let repr_depth = root.repr_depth();
321        if repr_depth <= SAFE_DEPTH {
322            self.fill_recursive(root);
323        } else {
324            self.fill_deep(root, repr_depth);
325        }
326
327        debug_assert!(self.cell_count > 0);
328        self.cell_count - 1
329    }
330
331    fn fill_recursive(&mut self, cell: &'a DynCell) {
332        for child in cell.references() {
333            if !self.rev_indices.contains_key(child.repr_hash()) {
334                self.fill_recursive(child);
335            }
336        }
337
338        self.rev_indices.insert(cell.repr_hash(), self.cell_count);
339        self.rev_cells.push(cell);
340
341        let descriptor = cell.descriptor();
342        self.total_data_size += descriptor.byte_len_full(self.without_hashes);
343        self.reference_count += descriptor.reference_count() as u64;
344        self.cell_count += 1;
345    }
346
347    #[cold]
348    fn fill_deep(&mut self, root: &'a DynCell, repr_depth: u16) {
349        const MAX_DEFAULT_CAPACITY: u16 = 256;
350
351        let mut stack = Vec::with_capacity(repr_depth.min(MAX_DEFAULT_CAPACITY) as usize);
352        stack.push(root.references());
353
354        while let Some(children) = stack.last_mut() {
355            if let Some(cell) = children.next() {
356                if !self.rev_indices.contains_key(cell.repr_hash()) {
357                    stack.push(cell.references());
358                }
359            } else {
360                let cell = children.cell();
361
362                self.rev_indices.insert(cell.repr_hash(), self.cell_count);
363                self.rev_cells.push(cell);
364
365                let descriptor = cell.descriptor();
366                self.total_data_size += descriptor.byte_len_full(self.without_hashes);
367                self.reference_count += descriptor.reference_count() as u64;
368                self.cell_count += 1;
369
370                stack.pop();
371            }
372        }
373    }
374}
375
376impl CellDescriptor {
377    fn byte_len_full(self, without_hashes: bool) -> u64 {
378        let mut byte_len = self.byte_len() as u64;
379        if !without_hashes && self.store_hashes() {
380            byte_len += (self.level_mask().level() + 1) as u64 * (32 + 2);
381        }
382        byte_len
383    }
384}
385
386/// An info about the encoded BOC.
387#[derive(Copy, Clone)]
388pub struct BocHeaderStats {
389    /// Size of the offset numbers in bytes.
390    pub offset_size: usize,
391    /// Size of the reference indices in bytes.
392    pub ref_size: usize,
393    /// The total size of cells part in the resulting BOC.
394    ///
395    /// NOTE: Use [`total_size`] for the full BOC size.
396    ///
397    /// [`total_size`]: Self::total_size
398    pub total_cells_size: u64,
399
400    /// Total size of the encoded BOC in bytes.
401    pub total_size: u64,
402}
403
404fn number_of_bytes_to_fit(l: u64) -> usize {
405    (8 - l.leading_zeros() / 8) as usize
406}