1use std::collections::HashMap;
2use std::hash::BuildHasher;
3
4use super::BocTag;
5use crate::cell::{CellDescriptor, DynCell, HashBytes};
6
7pub struct BocHeaderCache<S> {
9 rev_indices: HashMap<&'static HashBytes, u32, S>,
10 rev_cells: Vec<&'static DynCell>,
11}
12
13impl<S: BuildHasher + Default> Default for BocHeaderCache<S> {
14 #[inline]
15 fn default() -> Self {
16 Self {
17 rev_indices: Default::default(),
18 rev_cells: Default::default(),
19 }
20 }
21}
22
23impl<S: BuildHasher + Default> BocHeaderCache<S> {
24 pub fn with_capacity(capacity: usize) -> Self {
26 Self {
27 rev_indices: HashMap::with_capacity_and_hasher(capacity, Default::default()),
28 rev_cells: Vec::with_capacity(capacity),
29 }
30 }
31
32 pub fn rev_indices_capacity(&self) -> usize {
34 self.rev_indices.capacity()
35 }
36
37 pub fn rev_cells_capacity(&self) -> usize {
39 self.rev_cells.capacity()
40 }
41}
42
43pub struct BocHeader<'a, S = ahash::RandomState> {
45 root_rev_indices: Vec<u32>,
46 rev_indices: HashMap<&'a HashBytes, u32, S>,
47 rev_cells: Vec<&'a DynCell>,
48 total_data_size: u64,
49 reference_count: u64,
50 cell_count: u32,
51 without_hashes: bool,
52 include_crc: bool,
53}
54
55impl<S: BuildHasher + Default> Default for BocHeader<'_, S> {
56 #[inline]
57 fn default() -> Self {
58 Self {
59 root_rev_indices: Default::default(),
60 rev_indices: Default::default(),
61 rev_cells: Default::default(),
62 total_data_size: 0,
63 reference_count: 0,
64 cell_count: 0,
65 without_hashes: false,
66 include_crc: false,
67 }
68 }
69}
70
71impl<'a, S> BocHeader<'a, S>
72where
73 S: BuildHasher + Default,
74{
75 pub fn with_root(root: &'a DynCell) -> Self {
77 let mut res = Self::default();
78 res.add_root(root);
79 res
80 }
81
82 #[inline]
85 pub fn with_capacity(capacity: usize) -> Self {
86 Self {
87 root_rev_indices: Default::default(),
88 rev_indices: HashMap::with_capacity_and_hasher(capacity, S::default()),
89 rev_cells: Vec::with_capacity(capacity),
90 total_data_size: 0,
91 reference_count: 0,
92 cell_count: 0,
93 without_hashes: false,
94 include_crc: false,
95 }
96 }
97
98 pub fn with_capacity_and_root(capacity: usize, root: &'a DynCell) -> Self {
101 let mut res = Self::with_capacity(capacity);
102 res.add_root(root);
103 res
104 }
105}
106
107impl<'a, S> BocHeader<'a, S>
108where
109 S: BuildHasher,
110{
111 pub fn with_root_and_cache(root: &'a DynCell, cache: BocHeaderCache<S>) -> Self {
113 debug_assert!(cache.rev_cells.is_empty());
114 debug_assert!(cache.rev_indices.is_empty());
115
116 let mut res = BocHeader::<'_, S> {
117 rev_indices: unsafe {
120 std::mem::transmute::<
121 HashMap<&'static HashBytes, u32, S>,
122 HashMap<&'a HashBytes, u32, S>,
123 >(cache.rev_indices)
124 },
125 rev_cells: unsafe {
128 std::mem::transmute::<Vec<&'static DynCell>, Vec<&'a DynCell>>(cache.rev_cells)
129 },
130 root_rev_indices: Default::default(),
131 total_data_size: 0,
132 reference_count: 0,
133 cell_count: 0,
134 without_hashes: false,
135
136 include_crc: false,
137 };
138 res.add_root(root);
139 res
140 }
141
142 pub fn into_cache(mut self) -> BocHeaderCache<S> {
144 self.rev_indices.clear();
145 self.rev_cells.clear();
146
147 BocHeaderCache {
148 rev_indices: unsafe {
151 std::mem::transmute::<
152 HashMap<&'a HashBytes, u32, S>,
153 HashMap<&'static HashBytes, u32, S>,
154 >(self.rev_indices)
155 },
156 rev_cells: unsafe {
159 std::mem::transmute::<Vec<&'a DynCell>, Vec<&'static DynCell>>(self.rev_cells)
160 },
161 }
162 }
163
164 pub fn clear(&mut self) {
166 self.root_rev_indices.clear();
167 self.rev_indices.clear();
168 self.rev_cells.clear();
169 self.total_data_size = 0;
170 self.reference_count = 0;
171 self.cell_count = 0;
172 }
173
174 pub fn add_root(&mut self, root: &'a DynCell) {
176 let root_rev_index = self.fill(root);
177 self.root_rev_indices.push(root_rev_index);
178 }
179
180 #[inline]
182 pub fn with_crc(mut self, include_ctc: bool) -> Self {
183 self.include_crc = include_ctc;
184 self
185 }
186
187 #[inline]
191 pub fn without_hashes(mut self, without_hashes: bool) -> Self {
192 self.without_hashes = without_hashes;
193 self
194 }
195
196 pub fn encode(&self, target: &mut Vec<u8>) {
198 let target_len_before = target.len();
199 let header = self.encode_header(target);
200 self.encode_cells_chunk(&self.rev_cells, header.ref_size, target);
201 self.encode_crc(target_len_before, target);
202
203 debug_assert_eq!(
204 target.len() as u64,
205 target_len_before as u64 + header.total_size
206 );
207 }
208
209 pub fn encode_to_writer<W: std::io::Write>(&self, mut writer: W) -> std::io::Result<()> {
213 const CELLS_CHUNK_SIZE: usize = 1000;
214 const P95_CELL_SIZE: usize = 128;
215
216 let mut crc = self.include_crc.then_some(0u32);
217 let mut total_size = 0;
218
219 let mut reset_chunk = |chunk: &mut Vec<u8>| {
220 if let Some(crc) = &mut crc {
221 *crc = crc32c::crc32c_append(*crc, chunk);
222 }
223 total_size += chunk.len() as u64;
224 chunk.clear();
225 };
226
227 let mut chunk = Vec::new();
228
229 let header = self.encode_header(&mut chunk);
231 ok!(writer.write_all(&chunk));
232 reset_chunk(&mut chunk);
233
234 for cells in self.rev_cells.rchunks(CELLS_CHUNK_SIZE) {
236 chunk.reserve(cells.len() * P95_CELL_SIZE);
237 self.encode_cells_chunk(cells, header.ref_size, &mut chunk);
238 ok!(writer.write_all(&chunk));
239 reset_chunk(&mut chunk);
240 }
241
242 debug_assert!(chunk.is_empty());
243
244 if let Some(crc) = crc {
245 ok!(writer.write_all(&crc.to_le_bytes()));
246 }
247
248 debug_assert_eq!(total_size, header.total_size);
249 Ok(())
250 }
251
252 #[cfg(feature = "rayon")]
255 pub fn encode_rayon(&self, target: &mut Vec<u8>)
256 where
257 S: Send + Sync,
258 {
259 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
260 use rayon::slice::ParallelSlice;
261
262 const CELLS_CHUNK_SIZE: usize = 5_000;
263 const P95_CELL_SIZE: usize = 128;
264
265 let target_len_before = target.len();
266 let header = self.encode_header(target);
267
268 if self.rev_cells.len() < CELLS_CHUNK_SIZE * 2 {
269 self.encode_cells_chunk(&self.rev_cells, header.ref_size, target);
270 } else {
271 let mut chunks = Vec::new();
272 self.rev_cells
273 .par_rchunks(CELLS_CHUNK_SIZE)
274 .map(|chunk| {
275 let mut target = Vec::with_capacity(chunk.len() * P95_CELL_SIZE);
276 self.encode_cells_chunk(chunk, header.ref_size, &mut target);
277 target
278 })
279 .collect_into_vec(&mut chunks);
280 for chunk in chunks {
281 target.extend_from_slice(&chunk);
282 }
283 }
284
285 self.encode_crc(target_len_before, target);
286
287 debug_assert_eq!(
288 target.len() as u64,
289 target_len_before as u64 + header.total_size
290 );
291 }
292
293 pub fn compute_stats(&self) -> BocHeaderStats {
295 let root_count = self.root_rev_indices.len();
296
297 let ref_size = number_of_bytes_to_fit(self.cell_count as u64);
298
299 let total_cells_size = self.total_data_size
300 + (self.cell_count as u64 * 2) + (ref_size as u64 * self.reference_count);
302 let offset_size = number_of_bytes_to_fit(total_cells_size);
303
304 let total_size = 4
315 + 2
316 + (ref_size as u64) * (3 + root_count as u64)
317 + (offset_size as u64)
318 + total_cells_size
319 + u64::from(self.include_crc) * 4;
320
321 BocHeaderStats {
322 offset_size,
323 ref_size,
324 total_cells_size,
325 total_size,
326 }
327 }
328
329 #[inline]
330 fn encode_header(&self, target: &mut Vec<u8>) -> BocHeaderStats {
331 let stats = self.compute_stats();
332
333 let root_count = self.root_rev_indices.len();
334
335 debug_assert!((1..=4).contains(&stats.ref_size));
338
339 debug_assert!((1..=8).contains(&stats.offset_size));
342
343 let flags = (stats.ref_size as u8) | (u8::from(self.include_crc) * 0b0100_0000);
344
345 target.reserve(stats.total_size as usize);
346
347 target.extend_from_slice(&BocTag::GENERIC);
348 target.extend_from_slice(&[flags, stats.offset_size as u8]);
349 target.extend_from_slice(&self.cell_count.to_be_bytes()[4 - stats.ref_size..]);
350 target.extend_from_slice(&(root_count as u32).to_be_bytes()[4 - stats.ref_size..]);
351 target.extend_from_slice(&[0; 4][4 - stats.ref_size..]);
352 target.extend_from_slice(&stats.total_cells_size.to_be_bytes()[8 - stats.offset_size..]);
353
354 for rev_index in &self.root_rev_indices {
355 let root_index = self.cell_count - rev_index - 1;
356 target.extend_from_slice(&root_index.to_be_bytes()[4 - stats.ref_size..]);
357 }
358
359 stats
360 }
361
362 #[inline]
363 fn encode_cells_chunk(&self, chunk: &[&DynCell], ref_size: usize, target: &mut Vec<u8>) {
364 let descriptor_mask = !(u8::from(self.without_hashes) * CellDescriptor::STORE_HASHES_MASK);
365
366 for cell in chunk.iter().rev() {
367 let mut descriptor = cell.descriptor();
368 descriptor.d1 &= descriptor_mask;
369 target.extend_from_slice(&[descriptor.d1, descriptor.d2]);
370 if descriptor.store_hashes() {
371 let level_mask = descriptor.level_mask();
372 for level in level_mask {
373 target.extend_from_slice(cell.hash(level).as_ref());
374 }
375 for level in level_mask {
376 target.extend_from_slice(&cell.depth(level).to_be_bytes());
377 }
378 }
379 target.extend_from_slice(cell.data());
380 for child in cell.references() {
381 if let Some(rev_index) = self.rev_indices.get(child.repr_hash()) {
382 let rev_index = self.cell_count - *rev_index - 1;
383 target.extend_from_slice(&rev_index.to_be_bytes()[4 - ref_size..]);
384 } else {
385 debug_assert!(false, "child not found");
386 }
387 }
388 }
389 }
390
391 #[inline]
392 fn encode_crc(&self, target_len_before: usize, target: &mut Vec<u8>) {
393 if self.include_crc {
394 let target_len_after = target.len();
395 debug_assert!(target_len_before < target_len_after);
396
397 let crc = crc32c::crc32c(&target[target_len_before..target_len_after]);
398 target.extend_from_slice(&crc.to_le_bytes());
399 }
400 }
401
402 fn fill(&mut self, root: &'a DynCell) -> u32 {
403 const SAFE_DEPTH: u16 = 128;
404
405 if let Some(index) = self.rev_indices.get(root.repr_hash()) {
406 return *index;
407 }
408
409 let repr_depth = root.repr_depth();
410 if repr_depth <= SAFE_DEPTH {
411 self.fill_recursive(root);
412 } else {
413 self.fill_deep(root, repr_depth);
414 }
415
416 debug_assert!(self.cell_count > 0);
417 self.cell_count - 1
418 }
419
420 fn fill_recursive(&mut self, cell: &'a DynCell) {
421 for child in cell.references() {
422 if !self.rev_indices.contains_key(child.repr_hash()) {
423 self.fill_recursive(child);
424 }
425 }
426
427 self.rev_indices.insert(cell.repr_hash(), self.cell_count);
428 self.rev_cells.push(cell);
429
430 let descriptor = cell.descriptor();
431 self.total_data_size += descriptor.byte_len_full(self.without_hashes);
432 self.reference_count += descriptor.reference_count() as u64;
433 self.cell_count += 1;
434 }
435
436 #[cold]
437 fn fill_deep(&mut self, root: &'a DynCell, repr_depth: u16) {
438 const MAX_DEFAULT_CAPACITY: u16 = 256;
439
440 let mut stack = Vec::with_capacity(repr_depth.min(MAX_DEFAULT_CAPACITY) as usize);
441 stack.push(root.references());
442
443 while let Some(children) = stack.last_mut() {
444 if let Some(cell) = children.next() {
445 if !self.rev_indices.contains_key(cell.repr_hash()) {
446 stack.push(cell.references());
447 }
448 } else {
449 let cell = children.cell();
450
451 self.rev_indices.insert(cell.repr_hash(), self.cell_count);
452 self.rev_cells.push(cell);
453
454 let descriptor = cell.descriptor();
455 self.total_data_size += descriptor.byte_len_full(self.without_hashes);
456 self.reference_count += descriptor.reference_count() as u64;
457 self.cell_count += 1;
458
459 stack.pop();
460 }
461 }
462 }
463}
464
465impl CellDescriptor {
466 fn byte_len_full(self, without_hashes: bool) -> u64 {
467 let mut byte_len = self.byte_len() as u64;
468 if !without_hashes && self.store_hashes() {
469 byte_len += (self.level_mask().level() + 1) as u64 * (32 + 2);
470 }
471 byte_len
472 }
473}
474
475#[derive(Copy, Clone)]
477pub struct BocHeaderStats {
478 pub offset_size: usize,
480 pub ref_size: usize,
482 pub total_cells_size: u64,
488
489 pub total_size: u64,
491}
492
493fn number_of_bytes_to_fit(l: u64) -> usize {
494 (8 - l.leading_zeros() / 8) as usize
495}