1use crate::checksum::jenkins_lookup3;
12use crate::error::{Error, Result};
13use crate::io::Cursor;
14use crate::storage::Storage;
15
16const BTHD_SIGNATURE: [u8; 4] = *b"BTHD";
21const BTIN_SIGNATURE: [u8; 4] = *b"BTIN";
22const BTLF_SIGNATURE: [u8; 4] = *b"BTLF";
23
24#[derive(Debug, Clone)]
30pub struct BTreeV2Header {
31 pub btree_type: u8,
33 pub node_size: u32,
35 pub record_size: u16,
37 pub depth: u16,
39 pub split_percent: u8,
41 pub merge_percent: u8,
43 pub root_node_address: u64,
45 pub num_records_in_root: u16,
47 pub total_records: u64,
49}
50
51impl BTreeV2Header {
52 pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
68 let start = cursor.position();
69
70 let sig = cursor.read_bytes(4)?;
71 if sig != BTHD_SIGNATURE {
72 return Err(Error::InvalidBTreeV2Signature { context: "header" });
73 }
74
75 let version = cursor.read_u8()?;
76 if version != 0 {
77 return Err(Error::UnsupportedBTreeVersion(version));
78 }
79
80 let btree_type = cursor.read_u8()?;
81 let node_size = cursor.read_u32_le()?;
82 let record_size = cursor.read_u16_le()?;
83 let depth = cursor.read_u16_le()?;
84 let split_percent = cursor.read_u8()?;
85 let merge_percent = cursor.read_u8()?;
86 let root_node_address = cursor.read_offset(offset_size)?;
87 let num_records_in_root = cursor.read_u16_le()?;
88 let total_records = cursor.read_length(length_size)?;
89
90 let checksum_end = cursor.position();
92 let stored_checksum = cursor.read_u32_le()?;
93
94 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_end as usize]);
95 if computed != stored_checksum {
96 return Err(Error::ChecksumMismatch {
97 expected: stored_checksum,
98 actual: computed,
99 });
100 }
101
102 Ok(BTreeV2Header {
103 btree_type,
104 node_size,
105 record_size,
106 depth,
107 split_percent,
108 merge_percent,
109 root_node_address,
110 num_records_in_root,
111 total_records,
112 })
113 }
114
115 pub fn parse_at_storage(
117 storage: &dyn Storage,
118 address: u64,
119 offset_size: u8,
120 length_size: u8,
121 ) -> Result<Self> {
122 let header_len = 4
123 + 1
124 + 1
125 + 4
126 + 2
127 + 2
128 + 1
129 + 1
130 + usize::from(offset_size)
131 + 2
132 + usize::from(length_size)
133 + 4;
134 let bytes = storage.read_range(address, header_len)?;
135 let mut cursor = Cursor::new(bytes.as_ref());
136 Self::parse(&mut cursor, offset_size, length_size)
137 }
138}
139
140#[derive(Debug, Clone)]
148pub enum BTreeV2Record {
149 HugeIndirectNonFiltered {
151 address: u64,
152 length: u64,
153 object_id: u64,
154 },
155 HugeIndirectFiltered {
157 address: u64,
158 filtered_length: u64,
159 filter_mask: u32,
160 memory_length: u64,
161 object_id: u64,
162 },
163 HugeDirectNonFiltered { address: u64, length: u64 },
165 HugeDirectFiltered {
167 address: u64,
168 filtered_length: u64,
169 filter_mask: u32,
170 memory_length: u64,
171 },
172 LinkNameHash { hash: u32, heap_id: Vec<u8> },
174 CreationOrder { order: u64, heap_id: Vec<u8> },
176 AttributeNameHash {
178 hash: u32,
179 flags: u8,
180 creation_order: u32,
181 heap_id: Vec<u8>,
182 },
183 AttributeCreationOrder { order: u32, heap_id: Vec<u8> },
185 ChunkedNonFiltered { address: u64, offsets: Vec<u64> },
187 ChunkedFiltered {
189 address: u64,
190 chunk_size: u64,
191 filter_mask: u32,
192 offsets: Vec<u64>,
193 },
194 SharedMessageHeap {
196 hash: u32,
197 reference_count: u32,
198 heap_id: Vec<u8>,
199 },
200 SharedMessageObjectHeader {
202 hash: u32,
203 message_type: u16,
204 object_header_index: u16,
205 object_header_address: u64,
206 },
207 Unknown { record_type: u8, data: Vec<u8> },
209}
210
211fn parse_record(
217 cursor: &mut Cursor,
218 btree_type: u8,
219 record_size: u16,
220 offset_size: u8,
221 length_size: u8,
222 _ndims: Option<u32>,
223 heap_id_len: usize,
224) -> Result<BTreeV2Record> {
225 let record_start = cursor.position();
226
227 let record = match btree_type {
228 1 => BTreeV2Record::HugeIndirectNonFiltered {
230 address: cursor.read_offset(offset_size)?,
231 length: cursor.read_length(length_size)?,
232 object_id: cursor.read_length(length_size)?,
233 },
234
235 2 => BTreeV2Record::HugeIndirectFiltered {
237 address: cursor.read_offset(offset_size)?,
238 filtered_length: cursor.read_length(length_size)?,
239 filter_mask: cursor.read_u32_le()?,
240 memory_length: cursor.read_length(length_size)?,
241 object_id: cursor.read_length(length_size)?,
242 },
243
244 3 => BTreeV2Record::HugeDirectNonFiltered {
246 address: cursor.read_offset(offset_size)?,
247 length: cursor.read_length(length_size)?,
248 },
249
250 4 => BTreeV2Record::HugeDirectFiltered {
252 address: cursor.read_offset(offset_size)?,
253 filtered_length: cursor.read_length(length_size)?,
254 filter_mask: cursor.read_u32_le()?,
255 memory_length: cursor.read_length(length_size)?,
256 },
257
258 5 => {
260 let hash = cursor.read_u32_le()?;
261 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
262 BTreeV2Record::LinkNameHash { hash, heap_id }
263 }
264
265 6 => {
267 let order = cursor.read_u64_le()?;
268 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
269 BTreeV2Record::CreationOrder { order, heap_id }
270 }
271
272 7 => {
274 let location = cursor.read_u8()?;
275 cursor.skip(3)?;
276 let hash = cursor.read_u32_le()?;
277 match location {
278 0 => {
279 let reference_count = cursor.read_u32_le()?;
280 let heap_id = cursor.read_bytes(8)?.to_vec();
281 BTreeV2Record::SharedMessageHeap {
282 hash,
283 reference_count,
284 heap_id,
285 }
286 }
287 1 => {
288 let _reserved = cursor.read_u8()?;
289 let message_type = u16::from(cursor.read_u8()?);
290 let object_header_index = cursor.read_u16_le()?;
291 let object_header_address = cursor.read_offset(offset_size)?;
292 BTreeV2Record::SharedMessageObjectHeader {
293 hash,
294 message_type,
295 object_header_index,
296 object_header_address,
297 }
298 }
299 other => {
300 return Err(Error::InvalidData(format!(
301 "unknown SOHM B-tree record location: {other}"
302 )));
303 }
304 }
305 }
306
307 8 => {
309 let hash = cursor.read_u32_le()?;
310 let flags = cursor.read_u8()?;
311 let creation_order = cursor.read_u32_le()?;
312 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
313 BTreeV2Record::AttributeNameHash {
314 hash,
315 flags,
316 creation_order,
317 heap_id,
318 }
319 }
320
321 9 => {
323 let order = cursor.read_u32_le()?;
324 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
325 BTreeV2Record::AttributeCreationOrder { order, heap_id }
326 }
327
328 10 => {
330 let address = cursor.read_offset(offset_size)?;
331 let offset_bytes_available = record_size as usize - offset_size as usize;
335 let num_offsets = offset_bytes_available / 8;
336 let mut offsets = Vec::with_capacity(num_offsets);
337 for _ in 0..num_offsets {
338 offsets.push(cursor.read_u64_le()?);
339 }
340 BTreeV2Record::ChunkedNonFiltered { address, offsets }
341 }
342
343 11 => {
345 let address = cursor.read_offset(offset_size)?;
346 let nbytes_size = length_size as usize;
348 let chunk_size = cursor.read_length(length_size)?;
349 let filter_mask = cursor.read_u32_le()?;
350 let remaining = record_size as usize - offset_size as usize - nbytes_size - 4; let num_offsets = remaining / 8;
352 let mut offsets = Vec::with_capacity(num_offsets);
353 for _ in 0..num_offsets {
354 offsets.push(cursor.read_u64_le()?);
355 }
356 BTreeV2Record::ChunkedFiltered {
357 address,
358 chunk_size,
359 filter_mask,
360 offsets,
361 }
362 }
363
364 _ => {
366 let data = cursor.read_bytes(record_size as usize)?.to_vec();
367 return Ok(BTreeV2Record::Unknown {
368 record_type: btree_type,
369 data,
370 });
371 }
372 };
373
374 let consumed = (cursor.position() - record_start) as usize;
376 if consumed < record_size as usize {
377 cursor.skip(record_size as usize - consumed)?;
378 }
379
380 Ok(record)
381}
382
383fn record_matches_chunk_bounds(
384 record: &BTreeV2Record,
385 chunk_dims: &[u32],
386 chunk_bounds: Option<(&[u64], &[u64])>,
387) -> bool {
388 let Some((first_chunk, last_chunk)) = chunk_bounds else {
389 return true;
390 };
391
392 let offsets = match record {
393 BTreeV2Record::ChunkedNonFiltered { offsets, .. }
394 | BTreeV2Record::ChunkedFiltered { offsets, .. } => offsets,
395 _ => return true,
396 };
397
398 offsets.iter().enumerate().all(|(dim, offset)| {
399 let chunk_index = *offset / u64::from(chunk_dims[dim]);
400 chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
401 })
402}
403
404fn num_records_size(max_records: u64) -> usize {
411 if max_records <= 0xFF {
412 1
413 } else if max_records <= 0xFFFF {
414 2
415 } else if max_records <= 0xFFFF_FFFF {
416 4
417 } else {
418 8
419 }
420}
421
422fn max_leaf_records(node_size: u32, record_size: u16) -> u64 {
424 let overhead = 10u32;
426 if node_size <= overhead || record_size == 0 {
427 return 0;
428 }
429 ((node_size - overhead) / record_size as u32) as u64
430}
431
432fn max_internal_records(
437 node_size: u32,
438 record_size: u16,
439 offset_size: u8,
440 max_child_records: u64,
441) -> u64 {
442 let overhead = 10u32;
444 if node_size <= overhead || record_size == 0 {
445 return 0;
446 }
447 let available = (node_size - overhead) as u64;
448 let nrec_size = num_records_size(max_child_records) as u64;
450 let entry_size = record_size as u64 + offset_size as u64 + nrec_size;
451 let extra = offset_size as u64 + nrec_size;
455 if available <= extra {
456 return 0;
457 }
458 (available - extra) / entry_size
459}
460
461#[allow(clippy::too_many_arguments)]
463fn parse_leaf_node(
464 cursor: &mut Cursor,
465 header: &BTreeV2Header,
466 offset_size: u8,
467 length_size: u8,
468 ndims: Option<u32>,
469 chunk_dims: &[u32],
470 chunk_bounds: Option<(&[u64], &[u64])>,
471 num_records: u16,
472 heap_id_len: usize,
473 records: &mut Vec<BTreeV2Record>,
474) -> Result<()> {
475 let start = cursor.position();
476
477 let sig = cursor.read_bytes(4)?;
478 if sig != BTLF_SIGNATURE {
479 return Err(Error::InvalidBTreeV2Signature {
480 context: "leaf node",
481 });
482 }
483
484 let version = cursor.read_u8()?;
485 if version != 0 {
486 return Err(Error::UnsupportedBTreeVersion(version));
487 }
488
489 let node_type = cursor.read_u8()?;
490 if node_type != header.btree_type {
491 return Err(Error::InvalidData(format!(
492 "B-tree v2 leaf node type mismatch: header says {}, node says {}",
493 header.btree_type, node_type
494 )));
495 }
496
497 for _ in 0..num_records {
498 let record = parse_record(
499 cursor,
500 header.btree_type,
501 header.record_size,
502 offset_size,
503 length_size,
504 ndims,
505 heap_id_len,
506 )?;
507 if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
508 records.push(record);
509 }
510 }
511
512 let checksum_data_end = cursor.position();
514 let stored_checksum = cursor.read_u32_le()?;
515 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
516 if computed != stored_checksum {
517 return Err(Error::ChecksumMismatch {
518 expected: stored_checksum,
519 actual: computed,
520 });
521 }
522
523 Ok(())
524}
525
526#[allow(clippy::too_many_arguments)]
528fn parse_internal_node(
529 data: &[u8],
530 address: u64,
531 header: &BTreeV2Header,
532 offset_size: u8,
533 length_size: u8,
534 ndims: Option<u32>,
535 chunk_dims: &[u32],
536 chunk_bounds: Option<(&[u64], &[u64])>,
537 num_records: u16,
538 depth: u16,
539 heap_id_len: usize,
540 records: &mut Vec<BTreeV2Record>,
541) -> Result<()> {
542 let mut cursor = Cursor::new(data);
543 cursor.set_position(address);
544
545 let start = cursor.position();
546
547 let sig = cursor.read_bytes(4)?;
548 if sig != BTIN_SIGNATURE {
549 return Err(Error::InvalidBTreeV2Signature {
550 context: "internal node",
551 });
552 }
553
554 let version = cursor.read_u8()?;
555 if version != 0 {
556 return Err(Error::UnsupportedBTreeVersion(version));
557 }
558
559 let node_type = cursor.read_u8()?;
560 if node_type != header.btree_type {
561 return Err(Error::InvalidData(format!(
562 "B-tree v2 internal node type mismatch: header says {}, node says {}",
563 header.btree_type, node_type
564 )));
565 }
566
567 let max_child_records = if depth == 1 {
570 max_leaf_records(header.node_size, header.record_size)
571 } else {
572 let leaf_max = max_leaf_records(header.node_size, header.record_size);
574 let mut prev_max = leaf_max;
575 for _ in 1..depth {
576 prev_max =
577 max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
578 }
579 prev_max
580 };
581 let nrec_bytes = num_records_size(max_child_records);
582
583 let mut node_records = Vec::with_capacity(num_records as usize);
595 for _ in 0..num_records {
596 let record = parse_record(
597 &mut cursor,
598 header.btree_type,
599 header.record_size,
600 offset_size,
601 length_size,
602 ndims,
603 heap_id_len,
604 )?;
605 if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
606 node_records.push(record);
607 }
608 }
609
610 let num_children = num_records as usize + 1;
612
613 let has_total_records = depth > 1;
616 let total_nrec_bytes = if has_total_records {
618 length_size as usize
621 } else {
622 0
623 };
624
625 let mut child_addresses = Vec::with_capacity(num_children);
626 let mut child_nrecords = Vec::with_capacity(num_children);
627
628 for _ in 0..num_children {
629 let child_addr = cursor.read_offset(offset_size)?;
630 child_addresses.push(child_addr);
631 let nrec = cursor.read_uvar(nrec_bytes)?;
632 child_nrecords.push(nrec as u16);
633 if has_total_records {
634 cursor.read_uvar(total_nrec_bytes)?;
636 }
637 }
638
639 let checksum_data_end = cursor.position();
641 let stored_checksum = cursor.read_u32_le()?;
642 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
643 if computed != stored_checksum {
644 return Err(Error::ChecksumMismatch {
645 expected: stored_checksum,
646 actual: computed,
647 });
648 }
649
650 records.extend(node_records);
656
657 let child_depth = depth - 1;
659 for (i, &child_addr) in child_addresses.iter().enumerate() {
660 if Cursor::is_undefined_offset(child_addr, offset_size) {
661 continue;
662 }
663 let child_nrec = child_nrecords[i];
664 if child_depth == 0 {
665 let mut child_cursor = Cursor::new(data);
667 child_cursor.set_position(child_addr);
668 parse_leaf_node(
669 &mut child_cursor,
670 header,
671 offset_size,
672 length_size,
673 ndims,
674 chunk_dims,
675 chunk_bounds,
676 child_nrec,
677 heap_id_len,
678 records,
679 )?;
680 } else {
681 parse_internal_node(
683 data,
684 child_addr,
685 header,
686 offset_size,
687 length_size,
688 ndims,
689 chunk_dims,
690 chunk_bounds,
691 child_nrec,
692 child_depth,
693 heap_id_len,
694 records,
695 )?;
696 }
697 }
698
699 Ok(())
700}
701
702pub fn collect_btree_v2_records(
711 data: &[u8],
712 header: &BTreeV2Header,
713 offset_size: u8,
714 length_size: u8,
715 ndims: Option<u32>,
716 chunk_dims: &[u32],
717 chunk_bounds: Option<(&[u64], &[u64])>,
718) -> Result<Vec<BTreeV2Record>> {
719 if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
720 return Ok(Vec::new());
721 }
722
723 if header.total_records == 0 || header.num_records_in_root == 0 {
724 return Ok(Vec::new());
725 }
726
727 let heap_id_len = compute_heap_id_len(header);
729
730 let mut records = Vec::new();
731
732 if header.depth == 0 {
733 let mut cursor = Cursor::new(data);
735 cursor.set_position(header.root_node_address);
736 parse_leaf_node(
737 &mut cursor,
738 header,
739 offset_size,
740 length_size,
741 ndims,
742 chunk_dims,
743 chunk_bounds,
744 header.num_records_in_root,
745 heap_id_len,
746 &mut records,
747 )?;
748 } else {
749 parse_internal_node(
751 data,
752 header.root_node_address,
753 header,
754 offset_size,
755 length_size,
756 ndims,
757 chunk_dims,
758 chunk_bounds,
759 header.num_records_in_root,
760 header.depth,
761 heap_id_len,
762 &mut records,
763 )?;
764 }
765
766 Ok(records)
767}
768
769pub fn collect_btree_v2_records_storage(
771 storage: &dyn Storage,
772 header: &BTreeV2Header,
773 offset_size: u8,
774 length_size: u8,
775 ndims: Option<u32>,
776 chunk_dims: &[u32],
777 chunk_bounds: Option<(&[u64], &[u64])>,
778) -> Result<Vec<BTreeV2Record>> {
779 if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
780 return Ok(Vec::new());
781 }
782
783 if header.total_records == 0 || header.num_records_in_root == 0 {
784 return Ok(Vec::new());
785 }
786
787 let heap_id_len = compute_heap_id_len(header);
788 let mut records = Vec::new();
789
790 if header.depth == 0 {
791 parse_leaf_node_storage(
792 storage,
793 header.root_node_address,
794 header,
795 offset_size,
796 length_size,
797 ndims,
798 chunk_dims,
799 chunk_bounds,
800 header.num_records_in_root,
801 heap_id_len,
802 &mut records,
803 )?;
804 } else {
805 parse_internal_node_storage(
806 storage,
807 header.root_node_address,
808 header,
809 offset_size,
810 length_size,
811 ndims,
812 chunk_dims,
813 chunk_bounds,
814 header.num_records_in_root,
815 header.depth,
816 heap_id_len,
817 &mut records,
818 )?;
819 }
820
821 Ok(records)
822}
823
824#[allow(clippy::too_many_arguments)]
825fn parse_leaf_node_storage(
826 storage: &dyn Storage,
827 address: u64,
828 header: &BTreeV2Header,
829 offset_size: u8,
830 length_size: u8,
831 ndims: Option<u32>,
832 chunk_dims: &[u32],
833 chunk_bounds: Option<(&[u64], &[u64])>,
834 num_records: u16,
835 heap_id_len: usize,
836 records: &mut Vec<BTreeV2Record>,
837) -> Result<()> {
838 let _node_len = usize::try_from(header.node_size).map_err(|_| {
839 Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
840 })?;
841 let read_len = usize::try_from(storage.len().saturating_sub(address)).map_err(|_| {
842 Error::InvalidData("B-tree v2 node read exceeds platform usize capacity".into())
843 })?;
844 let node_bytes = storage.read_range(address, read_len)?;
845 let mut cursor = Cursor::new(node_bytes.as_ref());
846 parse_leaf_node(
847 &mut cursor,
848 header,
849 offset_size,
850 length_size,
851 ndims,
852 chunk_dims,
853 chunk_bounds,
854 num_records,
855 heap_id_len,
856 records,
857 )
858}
859
860#[allow(clippy::too_many_arguments)]
861fn parse_internal_node_storage(
862 storage: &dyn Storage,
863 address: u64,
864 header: &BTreeV2Header,
865 offset_size: u8,
866 length_size: u8,
867 ndims: Option<u32>,
868 chunk_dims: &[u32],
869 chunk_bounds: Option<(&[u64], &[u64])>,
870 num_records: u16,
871 depth: u16,
872 heap_id_len: usize,
873 records: &mut Vec<BTreeV2Record>,
874) -> Result<()> {
875 let _node_len = usize::try_from(header.node_size).map_err(|_| {
876 Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
877 })?;
878 let read_len = usize::try_from(storage.len().saturating_sub(address)).map_err(|_| {
879 Error::InvalidData("B-tree v2 node read exceeds platform usize capacity".into())
880 })?;
881 let node_bytes = storage.read_range(address, read_len)?;
882 let mut cursor = Cursor::new(node_bytes.as_ref());
883 let start = cursor.position();
884
885 let sig = cursor.read_bytes(4)?;
886 if sig != BTIN_SIGNATURE {
887 return Err(Error::InvalidBTreeV2Signature {
888 context: "internal node",
889 });
890 }
891
892 let version = cursor.read_u8()?;
893 if version != 0 {
894 return Err(Error::UnsupportedBTreeVersion(version));
895 }
896
897 let node_type = cursor.read_u8()?;
898 if node_type != header.btree_type {
899 return Err(Error::InvalidData(format!(
900 "B-tree v2 internal node type mismatch: header says {}, node says {}",
901 header.btree_type, node_type
902 )));
903 }
904
905 let max_child_records = if depth == 1 {
906 max_leaf_records(header.node_size, header.record_size)
907 } else {
908 let leaf_max = max_leaf_records(header.node_size, header.record_size);
909 let mut prev_max = leaf_max;
910 for _ in 1..depth {
911 prev_max =
912 max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
913 }
914 prev_max
915 };
916 let nrec_bytes = num_records_size(max_child_records);
917
918 let mut node_records = Vec::with_capacity(num_records as usize);
919 for _ in 0..num_records {
920 let record = parse_record(
921 &mut cursor,
922 header.btree_type,
923 header.record_size,
924 offset_size,
925 length_size,
926 ndims,
927 heap_id_len,
928 )?;
929 if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
930 node_records.push(record);
931 }
932 }
933
934 let num_children = usize::from(num_records) + 1;
935 let has_total_records = depth > 1;
936 let total_nrec_bytes = if has_total_records {
937 usize::from(length_size)
938 } else {
939 0
940 };
941
942 let mut child_addresses = Vec::with_capacity(num_children);
943 let mut child_nrecords = Vec::with_capacity(num_children);
944 for _ in 0..num_children {
945 child_addresses.push(cursor.read_offset(offset_size)?);
946 child_nrecords.push(cursor.read_uvar(nrec_bytes)? as u16);
947 if has_total_records {
948 cursor.read_uvar(total_nrec_bytes)?;
949 }
950 }
951
952 let checksum_data_end = cursor.position();
953 let stored_checksum = cursor.read_u32_le()?;
954 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
955 if computed != stored_checksum {
956 return Err(Error::ChecksumMismatch {
957 expected: stored_checksum,
958 actual: computed,
959 });
960 }
961
962 records.extend(node_records);
963
964 let child_depth = depth - 1;
965 for (i, &child_addr) in child_addresses.iter().enumerate() {
966 if Cursor::is_undefined_offset(child_addr, offset_size) {
967 continue;
968 }
969 let child_nrec = child_nrecords[i];
970 if child_depth == 0 {
971 parse_leaf_node_storage(
972 storage,
973 child_addr,
974 header,
975 offset_size,
976 length_size,
977 ndims,
978 chunk_dims,
979 chunk_bounds,
980 child_nrec,
981 heap_id_len,
982 records,
983 )?;
984 } else {
985 parse_internal_node_storage(
986 storage,
987 child_addr,
988 header,
989 offset_size,
990 length_size,
991 ndims,
992 chunk_dims,
993 chunk_bounds,
994 child_nrec,
995 child_depth,
996 heap_id_len,
997 records,
998 )?;
999 }
1000 }
1001
1002 Ok(())
1003}
1004
1005fn compute_heap_id_len(header: &BTreeV2Header) -> usize {
1011 let rs = header.record_size as usize;
1012 match header.btree_type {
1013 5 => rs.saturating_sub(4), 6 => rs.saturating_sub(8), 8 => rs.saturating_sub(4 + 1 + 4), 9 => rs.saturating_sub(4), _ => 0,
1018 }
1019}
1020
1021#[cfg(test)]
1022mod tests {
1023 use super::*;
1024
1025 #[allow(clippy::too_many_arguments)]
1027 fn build_header(
1028 btree_type: u8,
1029 node_size: u32,
1030 record_size: u16,
1031 depth: u16,
1032 root_node_address: u64,
1033 num_records_in_root: u16,
1034 total_records: u64,
1035 offset_size: u8,
1036 length_size: u8,
1037 ) -> Vec<u8> {
1038 let mut buf = Vec::new();
1039 buf.extend_from_slice(b"BTHD");
1040 buf.push(0); buf.push(btree_type);
1042 buf.extend_from_slice(&node_size.to_le_bytes());
1043 buf.extend_from_slice(&record_size.to_le_bytes());
1044 buf.extend_from_slice(&depth.to_le_bytes());
1045 buf.push(75); buf.push(40); match offset_size {
1048 4 => buf.extend_from_slice(&(root_node_address as u32).to_le_bytes()),
1049 8 => buf.extend_from_slice(&root_node_address.to_le_bytes()),
1050 _ => panic!("unsupported"),
1051 }
1052 buf.extend_from_slice(&num_records_in_root.to_le_bytes());
1053 match length_size {
1054 4 => buf.extend_from_slice(&(total_records as u32).to_le_bytes()),
1055 8 => buf.extend_from_slice(&total_records.to_le_bytes()),
1056 _ => panic!("unsupported"),
1057 }
1058 let checksum = jenkins_lookup3(&buf);
1060 buf.extend_from_slice(&checksum.to_le_bytes());
1061 buf
1062 }
1063
1064 #[test]
1065 fn test_parse_header() {
1066 let data = build_header(5, 4096, 12, 0, 0x1000, 3, 3, 8, 8);
1067 let mut cursor = Cursor::new(&data);
1068 let hdr = BTreeV2Header::parse(&mut cursor, 8, 8).unwrap();
1069
1070 assert_eq!(hdr.btree_type, 5);
1071 assert_eq!(hdr.node_size, 4096);
1072 assert_eq!(hdr.record_size, 12);
1073 assert_eq!(hdr.depth, 0);
1074 assert_eq!(hdr.split_percent, 75);
1075 assert_eq!(hdr.merge_percent, 40);
1076 assert_eq!(hdr.root_node_address, 0x1000);
1077 assert_eq!(hdr.num_records_in_root, 3);
1078 assert_eq!(hdr.total_records, 3);
1079 }
1080
1081 #[test]
1082 fn test_bad_signature() {
1083 let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1084 data[0] = b'X';
1085 let mut cursor = Cursor::new(&data);
1086 assert!(matches!(
1087 BTreeV2Header::parse(&mut cursor, 8, 8),
1088 Err(Error::InvalidBTreeV2Signature { .. })
1089 ));
1090 }
1091
1092 #[test]
1093 fn test_bad_checksum() {
1094 let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1095 data[6] = 0xFF;
1097 let mut cursor = Cursor::new(&data);
1098 assert!(matches!(
1099 BTreeV2Header::parse(&mut cursor, 8, 8),
1100 Err(Error::ChecksumMismatch { .. })
1101 ));
1102 }
1103
1104 #[test]
1105 fn test_collect_empty_tree() {
1106 let header = BTreeV2Header {
1107 btree_type: 5,
1108 node_size: 4096,
1109 record_size: 12,
1110 depth: 0,
1111 split_percent: 75,
1112 merge_percent: 40,
1113 root_node_address: u64::MAX,
1114 num_records_in_root: 0,
1115 total_records: 0,
1116 };
1117 let data = vec![0u8; 100];
1118 let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1119 assert!(records.is_empty());
1120 }
1121
1122 #[test]
1123 fn test_compute_heap_id_len() {
1124 let h5 = BTreeV2Header {
1126 btree_type: 5,
1127 record_size: 12,
1128 node_size: 0,
1129 depth: 0,
1130 split_percent: 0,
1131 merge_percent: 0,
1132 root_node_address: 0,
1133 num_records_in_root: 0,
1134 total_records: 0,
1135 };
1136 assert_eq!(compute_heap_id_len(&h5), 8);
1137
1138 let h8 = BTreeV2Header {
1140 btree_type: 8,
1141 record_size: 17,
1142 ..h5
1143 };
1144 assert_eq!(compute_heap_id_len(&h8), 8);
1145 }
1146
1147 #[test]
1148 fn test_max_leaf_records() {
1149 assert_eq!(max_leaf_records(4096, 12), 340);
1152 }
1153
1154 #[test]
1155 fn test_num_records_size() {
1156 assert_eq!(num_records_size(0), 1);
1157 assert_eq!(num_records_size(255), 1);
1158 assert_eq!(num_records_size(256), 2);
1159 assert_eq!(num_records_size(65535), 2);
1160 assert_eq!(num_records_size(65536), 4);
1161 }
1162
1163 #[test]
1164 fn test_parse_huge_indirect_record() {
1165 let mut data = Vec::new();
1166 data.extend_from_slice(&0x1234u64.to_le_bytes());
1167 data.extend_from_slice(&99u64.to_le_bytes());
1168 data.extend_from_slice(&7u64.to_le_bytes());
1169 let mut cursor = Cursor::new(&data);
1170
1171 let record = parse_record(&mut cursor, 1, data.len() as u16, 8, 8, None, 0).unwrap();
1172 match record {
1173 BTreeV2Record::HugeIndirectNonFiltered {
1174 address,
1175 length,
1176 object_id,
1177 } => {
1178 assert_eq!(address, 0x1234);
1179 assert_eq!(length, 99);
1180 assert_eq!(object_id, 7);
1181 }
1182 other => panic!("expected huge record, got {:?}", other),
1183 }
1184 }
1185
1186 #[test]
1187 fn test_parse_shared_heap_record() {
1188 let mut data = Vec::new();
1189 data.push(0);
1190 data.extend_from_slice(&[0, 0, 0]);
1191 data.extend_from_slice(&0xAABB_CCDDu32.to_le_bytes());
1192 data.extend_from_slice(&3u32.to_le_bytes());
1193 data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1194 let mut cursor = Cursor::new(&data);
1195
1196 let record = parse_record(&mut cursor, 7, data.len() as u16, 8, 8, None, 0).unwrap();
1197 match record {
1198 BTreeV2Record::SharedMessageHeap {
1199 hash,
1200 reference_count,
1201 heap_id,
1202 } => {
1203 assert_eq!(hash, 0xAABB_CCDD);
1204 assert_eq!(reference_count, 3);
1205 assert_eq!(heap_id, vec![1, 2, 3, 4, 5, 6, 7, 8]);
1206 }
1207 other => panic!("expected shared heap record, got {:?}", other),
1208 }
1209 }
1210
1211 #[test]
1212 fn test_parse_leaf_with_type5_records() {
1213 let record_size: u16 = 12;
1216 let node_size: u32 = 4096;
1217
1218 let header = BTreeV2Header {
1219 btree_type: 5,
1220 node_size,
1221 record_size,
1222 depth: 0,
1223 split_percent: 75,
1224 merge_percent: 40,
1225 root_node_address: 0, num_records_in_root: 2,
1227 total_records: 2,
1228 };
1229
1230 let mut leaf = Vec::new();
1232 leaf.extend_from_slice(b"BTLF"); leaf.push(0); leaf.push(5); leaf.extend_from_slice(&0xAABBCCDDu32.to_le_bytes());
1238 leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1239
1240 leaf.extend_from_slice(&0x11223344u32.to_le_bytes());
1242 leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1243
1244 let checksum = jenkins_lookup3(&leaf);
1246 leaf.extend_from_slice(&checksum.to_le_bytes());
1247
1248 leaf.resize(node_size as usize, 0);
1250
1251 let mut records = Vec::new();
1252 let mut cursor = Cursor::new(&leaf);
1253 parse_leaf_node(
1254 &mut cursor,
1255 &header,
1256 8,
1257 8,
1258 None,
1259 &[],
1260 None,
1261 2,
1262 8, &mut records,
1264 )
1265 .unwrap();
1266
1267 assert_eq!(records.len(), 2);
1268 match &records[0] {
1269 BTreeV2Record::LinkNameHash { hash, heap_id } => {
1270 assert_eq!(*hash, 0xAABBCCDD);
1271 assert_eq!(heap_id, &[1, 2, 3, 4, 5, 6, 7, 8]);
1272 }
1273 _ => panic!("expected LinkNameHash"),
1274 }
1275 match &records[1] {
1276 BTreeV2Record::LinkNameHash { hash, heap_id } => {
1277 assert_eq!(*hash, 0x11223344);
1278 assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1279 }
1280 _ => panic!("expected LinkNameHash"),
1281 }
1282 }
1283}