tycho_types/boc/
ser.rs

1use std::collections::HashMap;
2use std::hash::BuildHasher;
3
4use super::BocTag;
5use crate::cell::{CellDescriptor, DynCell, HashBytes};
6
7/// Preallocated BOC header indices cache.
8pub struct BocHeaderCache<S> {
9    rev_indices: HashMap<&'static HashBytes, u32, S>,
10    rev_cells: Vec<&'static DynCell>,
11}
12
13impl<S: BuildHasher + Default> Default for BocHeaderCache<S> {
14    #[inline]
15    fn default() -> Self {
16        Self {
17            rev_indices: Default::default(),
18            rev_cells: Default::default(),
19        }
20    }
21}
22
23impl<S: BuildHasher + Default> BocHeaderCache<S> {
24    /// Creates an empty preallocated revs cache of specified capacity.
25    pub fn with_capacity(capacity: usize) -> Self {
26        Self {
27            rev_indices: HashMap::with_capacity_and_hasher(capacity, Default::default()),
28            rev_cells: Vec::with_capacity(capacity),
29        }
30    }
31
32    /// Capacity of the underlying `rev_indices` cache.
33    pub fn rev_indices_capacity(&self) -> usize {
34        self.rev_indices.capacity()
35    }
36
37    /// Capacity of the underlying `rev_cells` cache.
38    pub fn rev_cells_capacity(&self) -> usize {
39        self.rev_cells.capacity()
40    }
41}
42
43/// Intermediate BOC serializer state.
44pub struct BocHeader<'a, S = ahash::RandomState> {
45    root_rev_indices: Vec<u32>,
46    rev_indices: HashMap<&'a HashBytes, u32, S>,
47    rev_cells: Vec<&'a DynCell>,
48    total_data_size: u64,
49    reference_count: u64,
50    cell_count: u32,
51    without_hashes: bool,
52    include_crc: bool,
53}
54
55impl<S: BuildHasher + Default> Default for BocHeader<'_, S> {
56    #[inline]
57    fn default() -> Self {
58        Self {
59            root_rev_indices: Default::default(),
60            rev_indices: Default::default(),
61            rev_cells: Default::default(),
62            total_data_size: 0,
63            reference_count: 0,
64            cell_count: 0,
65            without_hashes: false,
66            include_crc: false,
67        }
68    }
69}
70
71impl<'a, S> BocHeader<'a, S>
72where
73    S: BuildHasher + Default,
74{
75    /// Creates an intermediate BOC serializer state with a single root.
76    pub fn with_root(root: &'a DynCell) -> Self {
77        let mut res = Self::default();
78        res.add_root(root);
79        res
80    }
81
82    /// Creates an empty intermediate BOC serializer state.
83    /// Reserves space for the specified number of cells.
84    #[inline]
85    pub fn with_capacity(capacity: usize) -> Self {
86        Self {
87            root_rev_indices: Default::default(),
88            rev_indices: HashMap::with_capacity_and_hasher(capacity, S::default()),
89            rev_cells: Vec::with_capacity(capacity),
90            total_data_size: 0,
91            reference_count: 0,
92            cell_count: 0,
93            without_hashes: false,
94            include_crc: false,
95        }
96    }
97
98    /// Creates an intermediate BOC serializer state with a single root.
99    /// Reserves space for the specified number of cells.
100    pub fn with_capacity_and_root(capacity: usize, root: &'a DynCell) -> Self {
101        let mut res = Self::with_capacity(capacity);
102        res.add_root(root);
103        res
104    }
105}
106
107impl<'a, S> BocHeader<'a, S>
108where
109    S: BuildHasher,
110{
111    /// Creates an intermediate BOC serializer state with a single root and preallocated revs cache.
112    pub fn with_root_and_cache(root: &'a DynCell, cache: BocHeaderCache<S>) -> Self {
113        debug_assert!(cache.rev_cells.is_empty());
114        debug_assert!(cache.rev_indices.is_empty());
115
116        let mut res = BocHeader::<'_, S> {
117            // SAFETY: `rev_indices` is guaranteed to be empty so that
118            // there is no difference in a key type lifetime.
119            rev_indices: unsafe {
120                std::mem::transmute::<
121                    HashMap<&'static HashBytes, u32, S>,
122                    HashMap<&'a HashBytes, u32, S>,
123                >(cache.rev_indices)
124            },
125            // SAFETY: `rev_cells` is guaranteed to be empty so that
126            // there is no difference in a value type lifetime.
127            rev_cells: unsafe {
128                std::mem::transmute::<Vec<&'static DynCell>, Vec<&'a DynCell>>(cache.rev_cells)
129            },
130            root_rev_indices: Default::default(),
131            total_data_size: 0,
132            reference_count: 0,
133            cell_count: 0,
134            without_hashes: false,
135
136            include_crc: false,
137        };
138        res.add_root(root);
139        res
140    }
141
142    /// Transforms BocHeader into reusable revs cache.
143    pub fn into_cache(mut self) -> BocHeaderCache<S> {
144        self.rev_indices.clear();
145        self.rev_cells.clear();
146
147        BocHeaderCache {
148            // SAFETY: `rev_indices` is guaranteed to be empty so that
149            // there is no difference in a key type lifetime.
150            rev_indices: unsafe {
151                std::mem::transmute::<
152                    HashMap<&'a HashBytes, u32, S>,
153                    HashMap<&'static HashBytes, u32, S>,
154                >(self.rev_indices)
155            },
156            // SAFETY: `rev_cells` is guaranteed to be empty so that
157            // there is no difference in a value type lifetime.
158            rev_cells: unsafe {
159                std::mem::transmute::<Vec<&'a DynCell>, Vec<&'static DynCell>>(self.rev_cells)
160            },
161        }
162    }
163
164    /// Clears the header, removing all cells. Keeps the allocated memory for reuse.
165    pub fn clear(&mut self) {
166        self.root_rev_indices.clear();
167        self.rev_indices.clear();
168        self.rev_cells.clear();
169        self.total_data_size = 0;
170        self.reference_count = 0;
171        self.cell_count = 0;
172    }
173
174    /// Adds an additional root to the state.
175    pub fn add_root(&mut self, root: &'a DynCell) {
176        let root_rev_index = self.fill(root);
177        self.root_rev_indices.push(root_rev_index);
178    }
179
180    /// Includes CRC bytes in the encoded BOC.
181    #[inline]
182    pub fn with_crc(mut self, include_ctc: bool) -> Self {
183        self.include_crc = include_ctc;
184        self
185    }
186
187    /// Prevents hashes from being stored in the encoded BOC.
188    ///
189    /// (overwrites descriptor flag `store_hashes` during serialization).
190    #[inline]
191    pub fn without_hashes(mut self, without_hashes: bool) -> Self {
192        self.without_hashes = without_hashes;
193        self
194    }
195
196    /// Encodes cell trees into bytes.
197    pub fn encode(&self, target: &mut Vec<u8>) {
198        let target_len_before = target.len();
199        let header = self.encode_header(target);
200        self.encode_cells_chunk(&self.rev_cells, header.ref_size, target);
201        self.encode_crc(target_len_before, target);
202
203        debug_assert_eq!(
204            target.len() as u64,
205            target_len_before as u64 + header.total_size
206        );
207    }
208
209    /// Writes cell trees into the writer.
210    ///
211    /// NOTE: Use [`BocHeader::encode`] when possible since it's faster.
212    pub fn encode_to_writer<W: std::io::Write>(&self, mut writer: W) -> std::io::Result<()> {
213        const CELLS_CHUNK_SIZE: usize = 1000;
214        const P95_CELL_SIZE: usize = 128;
215
216        let mut crc = self.include_crc.then_some(0u32);
217        let mut total_size = 0;
218
219        let mut reset_chunk = |chunk: &mut Vec<u8>| {
220            if let Some(crc) = &mut crc {
221                *crc = crc32c::crc32c_append(*crc, chunk);
222            }
223            total_size += chunk.len() as u64;
224            chunk.clear();
225        };
226
227        let mut chunk = Vec::new();
228
229        // Write header
230        let header = self.encode_header(&mut chunk);
231        ok!(writer.write_all(&chunk));
232        reset_chunk(&mut chunk);
233
234        // Write cells
235        for cells in self.rev_cells.rchunks(CELLS_CHUNK_SIZE) {
236            chunk.reserve(cells.len() * P95_CELL_SIZE);
237            self.encode_cells_chunk(cells, header.ref_size, &mut chunk);
238            ok!(writer.write_all(&chunk));
239            reset_chunk(&mut chunk);
240        }
241
242        debug_assert!(chunk.is_empty());
243
244        if let Some(crc) = crc {
245            ok!(writer.write_all(&crc.to_le_bytes()));
246        }
247
248        debug_assert_eq!(total_size, header.total_size);
249        Ok(())
250    }
251
252    /// Encodes cell trees into bytes.
253    /// Uses `rayon` under the hood.
254    #[cfg(feature = "rayon")]
255    pub fn encode_rayon(&self, target: &mut Vec<u8>)
256    where
257        S: Send + Sync,
258    {
259        use rayon::iter::{IndexedParallelIterator, ParallelIterator};
260        use rayon::slice::ParallelSlice;
261
262        const CELLS_CHUNK_SIZE: usize = 5_000;
263        const P95_CELL_SIZE: usize = 128;
264
265        let target_len_before = target.len();
266        let header = self.encode_header(target);
267
268        if self.rev_cells.len() < CELLS_CHUNK_SIZE * 2 {
269            self.encode_cells_chunk(&self.rev_cells, header.ref_size, target);
270        } else {
271            let mut chunks = Vec::new();
272            self.rev_cells
273                .par_rchunks(CELLS_CHUNK_SIZE)
274                .map(|chunk| {
275                    let mut target = Vec::with_capacity(chunk.len() * P95_CELL_SIZE);
276                    self.encode_cells_chunk(chunk, header.ref_size, &mut target);
277                    target
278                })
279                .collect_into_vec(&mut chunks);
280            for chunk in chunks {
281                target.extend_from_slice(&chunk);
282            }
283        }
284
285        self.encode_crc(target_len_before, target);
286
287        debug_assert_eq!(
288            target.len() as u64,
289            target_len_before as u64 + header.total_size
290        );
291    }
292
293    /// Computes the encoded BOC size and other stuff.
294    pub fn compute_stats(&self) -> BocHeaderStats {
295        let root_count = self.root_rev_indices.len();
296
297        let ref_size = number_of_bytes_to_fit(self.cell_count as u64);
298
299        let total_cells_size = self.total_data_size
300            + (self.cell_count as u64 * 2) // all descriptor bytes
301            + (ref_size as u64 * self.reference_count);
302        let offset_size = number_of_bytes_to_fit(total_cells_size);
303
304        // 4 bytes - BOC tag
305        // 1 byte - flags
306        // 1 byte - offset size
307        // {ref_size} - cell count
308        // {ref_size} - root count
309        // {ref_size} - absent cell count
310        // {offset_size} - total cells size
311        // root_count * {ref_size} - root indices
312        // {total_cells_size} - cells
313        // include_crc * 4 - optional CRC32
314        let total_size = 4
315            + 2
316            + (ref_size as u64) * (3 + root_count as u64)
317            + (offset_size as u64)
318            + total_cells_size
319            + u64::from(self.include_crc) * 4;
320
321        BocHeaderStats {
322            offset_size,
323            ref_size,
324            total_cells_size,
325            total_size,
326        }
327    }
328
329    #[inline]
330    fn encode_header(&self, target: &mut Vec<u8>) -> BocHeaderStats {
331        let stats = self.compute_stats();
332
333        let root_count = self.root_rev_indices.len();
334
335        // NOTE: `ref_size` will be in range 1..=4 because `self.cell_count`
336        // is `u32`, and there is at least one cell (see Self::new)
337        debug_assert!((1..=4).contains(&stats.ref_size));
338
339        // NOTE: `offset_size` will be in range 1..=8 because `self.cell_count`
340        // is at least 1, and `total_cells_size` is `u64`
341        debug_assert!((1..=8).contains(&stats.offset_size));
342
343        let flags = (stats.ref_size as u8) | (u8::from(self.include_crc) * 0b0100_0000);
344
345        target.reserve(stats.total_size as usize);
346
347        target.extend_from_slice(&BocTag::GENERIC);
348        target.extend_from_slice(&[flags, stats.offset_size as u8]);
349        target.extend_from_slice(&self.cell_count.to_be_bytes()[4 - stats.ref_size..]);
350        target.extend_from_slice(&(root_count as u32).to_be_bytes()[4 - stats.ref_size..]);
351        target.extend_from_slice(&[0; 4][4 - stats.ref_size..]);
352        target.extend_from_slice(&stats.total_cells_size.to_be_bytes()[8 - stats.offset_size..]);
353
354        for rev_index in &self.root_rev_indices {
355            let root_index = self.cell_count - rev_index - 1;
356            target.extend_from_slice(&root_index.to_be_bytes()[4 - stats.ref_size..]);
357        }
358
359        stats
360    }
361
362    #[inline]
363    fn encode_cells_chunk(&self, chunk: &[&DynCell], ref_size: usize, target: &mut Vec<u8>) {
364        let descriptor_mask = !(u8::from(self.without_hashes) * CellDescriptor::STORE_HASHES_MASK);
365
366        for cell in chunk.iter().rev() {
367            let mut descriptor = cell.descriptor();
368            descriptor.d1 &= descriptor_mask;
369            target.extend_from_slice(&[descriptor.d1, descriptor.d2]);
370            if descriptor.store_hashes() {
371                let level_mask = descriptor.level_mask();
372                for level in level_mask {
373                    target.extend_from_slice(cell.hash(level).as_ref());
374                }
375                for level in level_mask {
376                    target.extend_from_slice(&cell.depth(level).to_be_bytes());
377                }
378            }
379            target.extend_from_slice(cell.data());
380            for child in cell.references() {
381                if let Some(rev_index) = self.rev_indices.get(child.repr_hash()) {
382                    let rev_index = self.cell_count - *rev_index - 1;
383                    target.extend_from_slice(&rev_index.to_be_bytes()[4 - ref_size..]);
384                } else {
385                    debug_assert!(false, "child not found");
386                }
387            }
388        }
389    }
390
391    #[inline]
392    fn encode_crc(&self, target_len_before: usize, target: &mut Vec<u8>) {
393        if self.include_crc {
394            let target_len_after = target.len();
395            debug_assert!(target_len_before < target_len_after);
396
397            let crc = crc32c::crc32c(&target[target_len_before..target_len_after]);
398            target.extend_from_slice(&crc.to_le_bytes());
399        }
400    }
401
402    fn fill(&mut self, root: &'a DynCell) -> u32 {
403        const SAFE_DEPTH: u16 = 128;
404
405        if let Some(index) = self.rev_indices.get(root.repr_hash()) {
406            return *index;
407        }
408
409        let repr_depth = root.repr_depth();
410        if repr_depth <= SAFE_DEPTH {
411            self.fill_recursive(root);
412        } else {
413            self.fill_deep(root, repr_depth);
414        }
415
416        debug_assert!(self.cell_count > 0);
417        self.cell_count - 1
418    }
419
420    fn fill_recursive(&mut self, cell: &'a DynCell) {
421        for child in cell.references() {
422            if !self.rev_indices.contains_key(child.repr_hash()) {
423                self.fill_recursive(child);
424            }
425        }
426
427        self.rev_indices.insert(cell.repr_hash(), self.cell_count);
428        self.rev_cells.push(cell);
429
430        let descriptor = cell.descriptor();
431        self.total_data_size += descriptor.byte_len_full(self.without_hashes);
432        self.reference_count += descriptor.reference_count() as u64;
433        self.cell_count += 1;
434    }
435
436    #[cold]
437    fn fill_deep(&mut self, root: &'a DynCell, repr_depth: u16) {
438        const MAX_DEFAULT_CAPACITY: u16 = 256;
439
440        let mut stack = Vec::with_capacity(repr_depth.min(MAX_DEFAULT_CAPACITY) as usize);
441        stack.push(root.references());
442
443        while let Some(children) = stack.last_mut() {
444            if let Some(cell) = children.next() {
445                if !self.rev_indices.contains_key(cell.repr_hash()) {
446                    stack.push(cell.references());
447                }
448            } else {
449                let cell = children.cell();
450
451                self.rev_indices.insert(cell.repr_hash(), self.cell_count);
452                self.rev_cells.push(cell);
453
454                let descriptor = cell.descriptor();
455                self.total_data_size += descriptor.byte_len_full(self.without_hashes);
456                self.reference_count += descriptor.reference_count() as u64;
457                self.cell_count += 1;
458
459                stack.pop();
460            }
461        }
462    }
463}
464
465impl CellDescriptor {
466    fn byte_len_full(self, without_hashes: bool) -> u64 {
467        let mut byte_len = self.byte_len() as u64;
468        if !without_hashes && self.store_hashes() {
469            byte_len += (self.level_mask().level() + 1) as u64 * (32 + 2);
470        }
471        byte_len
472    }
473}
474
475/// An info about the encoded BOC.
476#[derive(Copy, Clone)]
477pub struct BocHeaderStats {
478    /// Size of the offset numbers in bytes.
479    pub offset_size: usize,
480    /// Size of the reference indices in bytes.
481    pub ref_size: usize,
482    /// The total size of cells part in the resulting BOC.
483    ///
484    /// NOTE: Use [`total_size`] for the full BOC size.
485    ///
486    /// [`total_size`]: Self::total_size
487    pub total_cells_size: u64,
488
489    /// Total size of the encoded BOC in bytes.
490    pub total_size: u64,
491}
492
493fn number_of_bytes_to_fit(l: u64) -> usize {
494    (8 - l.leading_zeros() / 8) as usize
495}