1use std::collections::HashMap;
2use std::hash::BuildHasher;
3
4use super::BocTag;
5use crate::cell::{CellDescriptor, DynCell, HashBytes};
6
7pub struct BocHeader<'a, S = ahash::RandomState> {
9 root_rev_indices: Vec<u32>,
10 rev_indices: HashMap<&'a HashBytes, u32, S>,
11 rev_cells: Vec<&'a DynCell>,
12 total_data_size: u64,
13 reference_count: u64,
14 cell_count: u32,
15 without_hashes: bool,
16 include_crc: bool,
17}
18
19impl<S: BuildHasher + Default> Default for BocHeader<'_, S> {
20 #[inline]
21 fn default() -> Self {
22 Self {
23 root_rev_indices: Default::default(),
24 rev_indices: Default::default(),
25 rev_cells: Default::default(),
26 total_data_size: 0,
27 reference_count: 0,
28 cell_count: 0,
29 without_hashes: false,
30 include_crc: false,
31 }
32 }
33}
34
35impl<'a, S> BocHeader<'a, S>
36where
37 S: BuildHasher + Default,
38{
39 pub fn with_root(root: &'a DynCell) -> Self {
41 let mut res = Self::default();
42 res.add_root(root);
43 res
44 }
45
46 #[inline]
49 pub fn with_capacity(capacity: usize) -> Self {
50 Self {
51 root_rev_indices: Default::default(),
52 rev_indices: HashMap::with_capacity_and_hasher(capacity, S::default()),
53 rev_cells: Vec::with_capacity(capacity),
54 total_data_size: 0,
55 reference_count: 0,
56 cell_count: 0,
57 without_hashes: false,
58 include_crc: false,
59 }
60 }
61
62 pub fn with_capacity_and_root(capacity: usize, root: &'a DynCell) -> Self {
65 let mut res = Self::with_capacity(capacity);
66 res.add_root(root);
67 res
68 }
69}
70
71impl<'a, S> BocHeader<'a, S>
72where
73 S: BuildHasher,
74{
75 pub fn clear(&mut self) {
77 self.root_rev_indices.clear();
78 self.rev_indices.clear();
79 self.rev_cells.clear();
80 self.total_data_size = 0;
81 self.reference_count = 0;
82 self.cell_count = 0;
83 }
84
85 pub fn add_root(&mut self, root: &'a DynCell) {
87 let root_rev_index = self.fill(root);
88 self.root_rev_indices.push(root_rev_index);
89 }
90
91 #[inline]
93 pub fn with_crc(mut self, include_ctc: bool) -> Self {
94 self.include_crc = include_ctc;
95 self
96 }
97
98 #[inline]
102 pub fn without_hashes(mut self, without_hashes: bool) -> Self {
103 self.without_hashes = without_hashes;
104 self
105 }
106
107 pub fn encode(&self, target: &mut Vec<u8>) {
109 let target_len_before = target.len();
110 let header = self.encode_header(target);
111 self.encode_cells_chunk(&self.rev_cells, header.ref_size, target);
112 self.encode_crc(target_len_before, target);
113
114 debug_assert_eq!(
115 target.len() as u64,
116 target_len_before as u64 + header.total_size
117 );
118 }
119
120 pub fn encode_to_writer<W: std::io::Write>(&self, mut writer: W) -> std::io::Result<()> {
124 const CELLS_CHUNK_SIZE: usize = 1000;
125 const P95_CELL_SIZE: usize = 128;
126
127 let mut crc = self.include_crc.then_some(0u32);
128 let mut total_size = 0;
129
130 let mut reset_chunk = |chunk: &mut Vec<u8>| {
131 if let Some(crc) = &mut crc {
132 *crc = crc32c::crc32c_append(*crc, chunk);
133 }
134 total_size += chunk.len() as u64;
135 chunk.clear();
136 };
137
138 let mut chunk = Vec::new();
139
140 let header = self.encode_header(&mut chunk);
142 ok!(writer.write_all(&chunk));
143 reset_chunk(&mut chunk);
144
145 for cells in self.rev_cells.rchunks(CELLS_CHUNK_SIZE) {
147 chunk.reserve(cells.len() * P95_CELL_SIZE);
148 self.encode_cells_chunk(cells, header.ref_size, &mut chunk);
149 ok!(writer.write_all(&chunk));
150 reset_chunk(&mut chunk);
151 }
152
153 debug_assert!(chunk.is_empty());
154
155 if let Some(crc) = crc {
156 ok!(writer.write_all(&crc.to_le_bytes()));
157 }
158
159 debug_assert_eq!(total_size, header.total_size);
160 Ok(())
161 }
162
163 #[cfg(feature = "rayon")]
166 pub fn encode_rayon(&self, target: &mut Vec<u8>)
167 where
168 S: Send + Sync,
169 {
170 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
171 use rayon::slice::ParallelSlice;
172
173 const CELLS_CHUNK_SIZE: usize = 5_000;
174 const P95_CELL_SIZE: usize = 128;
175
176 let target_len_before = target.len();
177 let header = self.encode_header(target);
178
179 if self.rev_cells.len() < CELLS_CHUNK_SIZE * 2 {
180 self.encode_cells_chunk(&self.rev_cells, header.ref_size, target);
181 } else {
182 let mut chunks = Vec::new();
183 self.rev_cells
184 .par_rchunks(CELLS_CHUNK_SIZE)
185 .map(|chunk| {
186 let mut target = Vec::with_capacity(chunk.len() * P95_CELL_SIZE);
187 self.encode_cells_chunk(chunk, header.ref_size, &mut target);
188 target
189 })
190 .collect_into_vec(&mut chunks);
191 for chunk in chunks {
192 target.extend_from_slice(&chunk);
193 }
194 }
195
196 self.encode_crc(target_len_before, target);
197
198 debug_assert_eq!(
199 target.len() as u64,
200 target_len_before as u64 + header.total_size
201 );
202 }
203
204 pub fn compute_stats(&self) -> BocHeaderStats {
206 let root_count = self.root_rev_indices.len();
207
208 let ref_size = number_of_bytes_to_fit(self.cell_count as u64);
209
210 let total_cells_size = self.total_data_size
211 + (self.cell_count as u64 * 2) + (ref_size as u64 * self.reference_count);
213 let offset_size = number_of_bytes_to_fit(total_cells_size);
214
215 let total_size = 4
226 + 2
227 + (ref_size as u64) * (3 + root_count as u64)
228 + (offset_size as u64)
229 + total_cells_size
230 + u64::from(self.include_crc) * 4;
231
232 BocHeaderStats {
233 offset_size,
234 ref_size,
235 total_cells_size,
236 total_size,
237 }
238 }
239
240 #[inline]
241 fn encode_header(&self, target: &mut Vec<u8>) -> BocHeaderStats {
242 let stats = self.compute_stats();
243
244 let root_count = self.root_rev_indices.len();
245
246 debug_assert!((1..=4).contains(&stats.ref_size));
249
250 debug_assert!((1..=8).contains(&stats.offset_size));
253
254 let flags = (stats.ref_size as u8) | (u8::from(self.include_crc) * 0b0100_0000);
255
256 target.reserve(stats.total_size as usize);
257
258 target.extend_from_slice(&BocTag::GENERIC);
259 target.extend_from_slice(&[flags, stats.offset_size as u8]);
260 target.extend_from_slice(&self.cell_count.to_be_bytes()[4 - stats.ref_size..]);
261 target.extend_from_slice(&(root_count as u32).to_be_bytes()[4 - stats.ref_size..]);
262 target.extend_from_slice(&[0; 4][4 - stats.ref_size..]);
263 target.extend_from_slice(&stats.total_cells_size.to_be_bytes()[8 - stats.offset_size..]);
264
265 for rev_index in &self.root_rev_indices {
266 let root_index = self.cell_count - rev_index - 1;
267 target.extend_from_slice(&root_index.to_be_bytes()[4 - stats.ref_size..]);
268 }
269
270 stats
271 }
272
273 #[inline]
274 fn encode_cells_chunk(&self, chunk: &[&DynCell], ref_size: usize, target: &mut Vec<u8>) {
275 let descriptor_mask = !(u8::from(self.without_hashes) * CellDescriptor::STORE_HASHES_MASK);
276
277 for cell in chunk.iter().rev() {
278 let mut descriptor = cell.descriptor();
279 descriptor.d1 &= descriptor_mask;
280 target.extend_from_slice(&[descriptor.d1, descriptor.d2]);
281 if descriptor.store_hashes() {
282 let level_mask = descriptor.level_mask();
283 for level in level_mask {
284 target.extend_from_slice(cell.hash(level).as_ref());
285 }
286 for level in level_mask {
287 target.extend_from_slice(&cell.depth(level).to_be_bytes());
288 }
289 }
290 target.extend_from_slice(cell.data());
291 for child in cell.references() {
292 if let Some(rev_index) = self.rev_indices.get(child.repr_hash()) {
293 let rev_index = self.cell_count - *rev_index - 1;
294 target.extend_from_slice(&rev_index.to_be_bytes()[4 - ref_size..]);
295 } else {
296 debug_assert!(false, "child not found");
297 }
298 }
299 }
300 }
301
302 #[inline]
303 fn encode_crc(&self, target_len_before: usize, target: &mut Vec<u8>) {
304 if self.include_crc {
305 let target_len_after = target.len();
306 debug_assert!(target_len_before < target_len_after);
307
308 let crc = crc32c::crc32c(&target[target_len_before..target_len_after]);
309 target.extend_from_slice(&crc.to_le_bytes());
310 }
311 }
312
313 fn fill(&mut self, root: &'a DynCell) -> u32 {
314 const SAFE_DEPTH: u16 = 128;
315
316 if let Some(index) = self.rev_indices.get(root.repr_hash()) {
317 return *index;
318 }
319
320 let repr_depth = root.repr_depth();
321 if repr_depth <= SAFE_DEPTH {
322 self.fill_recursive(root);
323 } else {
324 self.fill_deep(root, repr_depth);
325 }
326
327 debug_assert!(self.cell_count > 0);
328 self.cell_count - 1
329 }
330
331 fn fill_recursive(&mut self, cell: &'a DynCell) {
332 for child in cell.references() {
333 if !self.rev_indices.contains_key(child.repr_hash()) {
334 self.fill_recursive(child);
335 }
336 }
337
338 self.rev_indices.insert(cell.repr_hash(), self.cell_count);
339 self.rev_cells.push(cell);
340
341 let descriptor = cell.descriptor();
342 self.total_data_size += descriptor.byte_len_full(self.without_hashes);
343 self.reference_count += descriptor.reference_count() as u64;
344 self.cell_count += 1;
345 }
346
347 #[cold]
348 fn fill_deep(&mut self, root: &'a DynCell, repr_depth: u16) {
349 const MAX_DEFAULT_CAPACITY: u16 = 256;
350
351 let mut stack = Vec::with_capacity(repr_depth.min(MAX_DEFAULT_CAPACITY) as usize);
352 stack.push(root.references());
353
354 while let Some(children) = stack.last_mut() {
355 if let Some(cell) = children.next() {
356 if !self.rev_indices.contains_key(cell.repr_hash()) {
357 stack.push(cell.references());
358 }
359 } else {
360 let cell = children.cell();
361
362 self.rev_indices.insert(cell.repr_hash(), self.cell_count);
363 self.rev_cells.push(cell);
364
365 let descriptor = cell.descriptor();
366 self.total_data_size += descriptor.byte_len_full(self.without_hashes);
367 self.reference_count += descriptor.reference_count() as u64;
368 self.cell_count += 1;
369
370 stack.pop();
371 }
372 }
373 }
374}
375
376impl CellDescriptor {
377 fn byte_len_full(self, without_hashes: bool) -> u64 {
378 let mut byte_len = self.byte_len() as u64;
379 if !without_hashes && self.store_hashes() {
380 byte_len += (self.level_mask().level() + 1) as u64 * (32 + 2);
381 }
382 byte_len
383 }
384}
385
386#[derive(Copy, Clone)]
388pub struct BocHeaderStats {
389 pub offset_size: usize,
391 pub ref_size: usize,
393 pub total_cells_size: u64,
399
400 pub total_size: u64,
402}
403
404fn number_of_bytes_to_fit(l: u64) -> usize {
405 (8 - l.leading_zeros() / 8) as usize
406}