1use crate::checksum::jenkins_lookup3;
12use crate::error::{Error, Result};
13use crate::io::Cursor;
14use crate::storage::Storage;
15
16const BTHD_SIGNATURE: [u8; 4] = *b"BTHD";
21const BTIN_SIGNATURE: [u8; 4] = *b"BTIN";
22const BTLF_SIGNATURE: [u8; 4] = *b"BTLF";
23
24#[derive(Debug, Clone)]
30pub struct BTreeV2Header {
31 pub btree_type: u8,
33 pub node_size: u32,
35 pub record_size: u16,
37 pub depth: u16,
39 pub split_percent: u8,
41 pub merge_percent: u8,
43 pub root_node_address: u64,
45 pub num_records_in_root: u16,
47 pub total_records: u64,
49}
50
51impl BTreeV2Header {
52 pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
68 let start = cursor.position();
69
70 let sig = cursor.read_bytes(4)?;
71 if sig != BTHD_SIGNATURE {
72 return Err(Error::InvalidBTreeV2Signature { context: "header" });
73 }
74
75 let version = cursor.read_u8()?;
76 if version != 0 {
77 return Err(Error::UnsupportedBTreeVersion(version));
78 }
79
80 let btree_type = cursor.read_u8()?;
81 let node_size = cursor.read_u32_le()?;
82 let record_size = cursor.read_u16_le()?;
83 let depth = cursor.read_u16_le()?;
84 let split_percent = cursor.read_u8()?;
85 let merge_percent = cursor.read_u8()?;
86 let root_node_address = cursor.read_offset(offset_size)?;
87 let num_records_in_root = cursor.read_u16_le()?;
88 let total_records = cursor.read_length(length_size)?;
89
90 let checksum_end = cursor.position();
92 let stored_checksum = cursor.read_u32_le()?;
93
94 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_end as usize]);
95 if computed != stored_checksum {
96 return Err(Error::ChecksumMismatch {
97 expected: stored_checksum,
98 actual: computed,
99 });
100 }
101
102 Ok(BTreeV2Header {
103 btree_type,
104 node_size,
105 record_size,
106 depth,
107 split_percent,
108 merge_percent,
109 root_node_address,
110 num_records_in_root,
111 total_records,
112 })
113 }
114
115 pub fn parse_at_storage(
117 storage: &dyn Storage,
118 address: u64,
119 offset_size: u8,
120 length_size: u8,
121 ) -> Result<Self> {
122 let header_len = 4
123 + 1
124 + 1
125 + 4
126 + 2
127 + 2
128 + 1
129 + 1
130 + usize::from(offset_size)
131 + 2
132 + usize::from(length_size)
133 + 4;
134 let bytes = storage.read_range(address, header_len)?;
135 let mut cursor = Cursor::new(bytes.as_ref());
136 Self::parse(&mut cursor, offset_size, length_size)
137 }
138}
139
140#[derive(Debug, Clone)]
148pub enum BTreeV2Record {
149 HugeIndirectNonFiltered {
151 address: u64,
152 length: u64,
153 object_id: u64,
154 },
155 HugeIndirectFiltered {
157 address: u64,
158 filtered_length: u64,
159 filter_mask: u32,
160 memory_length: u64,
161 object_id: u64,
162 },
163 HugeDirectNonFiltered { address: u64, length: u64 },
165 HugeDirectFiltered {
167 address: u64,
168 filtered_length: u64,
169 filter_mask: u32,
170 memory_length: u64,
171 },
172 LinkNameHash { hash: u32, heap_id: Vec<u8> },
174 CreationOrder { order: u64, heap_id: Vec<u8> },
176 AttributeNameHash {
178 hash: u32,
179 flags: u8,
180 creation_order: u32,
181 heap_id: Vec<u8>,
182 },
183 AttributeCreationOrder { order: u32, heap_id: Vec<u8> },
185 ChunkedNonFiltered { address: u64, offsets: Vec<u64> },
187 ChunkedFiltered {
189 address: u64,
190 chunk_size: u64,
191 filter_mask: u32,
192 offsets: Vec<u64>,
193 },
194 SharedMessageHeap {
196 hash: u32,
197 reference_count: u32,
198 heap_id: Vec<u8>,
199 },
200 SharedMessageObjectHeader {
202 hash: u32,
203 message_type: u16,
204 object_header_index: u16,
205 object_header_address: u64,
206 },
207 Unknown { record_type: u8, data: Vec<u8> },
209}
210
211fn parse_record(
217 cursor: &mut Cursor,
218 btree_type: u8,
219 record_size: u16,
220 offset_size: u8,
221 length_size: u8,
222 _ndims: Option<u32>,
223 heap_id_len: usize,
224) -> Result<BTreeV2Record> {
225 let record_start = cursor.position();
226
227 let record = match btree_type {
228 1 => BTreeV2Record::HugeIndirectNonFiltered {
230 address: cursor.read_offset(offset_size)?,
231 length: cursor.read_length(length_size)?,
232 object_id: cursor.read_length(length_size)?,
233 },
234
235 2 => BTreeV2Record::HugeIndirectFiltered {
237 address: cursor.read_offset(offset_size)?,
238 filtered_length: cursor.read_length(length_size)?,
239 filter_mask: cursor.read_u32_le()?,
240 memory_length: cursor.read_length(length_size)?,
241 object_id: cursor.read_length(length_size)?,
242 },
243
244 3 => BTreeV2Record::HugeDirectNonFiltered {
246 address: cursor.read_offset(offset_size)?,
247 length: cursor.read_length(length_size)?,
248 },
249
250 4 => BTreeV2Record::HugeDirectFiltered {
252 address: cursor.read_offset(offset_size)?,
253 filtered_length: cursor.read_length(length_size)?,
254 filter_mask: cursor.read_u32_le()?,
255 memory_length: cursor.read_length(length_size)?,
256 },
257
258 5 => {
260 let hash = cursor.read_u32_le()?;
261 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
262 BTreeV2Record::LinkNameHash { hash, heap_id }
263 }
264
265 6 => {
267 let order = cursor.read_u64_le()?;
268 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
269 BTreeV2Record::CreationOrder { order, heap_id }
270 }
271
272 7 => {
274 let location = cursor.read_u8()?;
275 cursor.skip(3)?;
276 let hash = cursor.read_u32_le()?;
277 match location {
278 0 => {
279 let reference_count = cursor.read_u32_le()?;
280 let heap_id = cursor.read_bytes(8)?.to_vec();
281 BTreeV2Record::SharedMessageHeap {
282 hash,
283 reference_count,
284 heap_id,
285 }
286 }
287 1 => {
288 let _reserved = cursor.read_u8()?;
289 let message_type = u16::from(cursor.read_u8()?);
290 let object_header_index = cursor.read_u16_le()?;
291 let object_header_address = cursor.read_offset(offset_size)?;
292 BTreeV2Record::SharedMessageObjectHeader {
293 hash,
294 message_type,
295 object_header_index,
296 object_header_address,
297 }
298 }
299 other => {
300 return Err(Error::InvalidData(format!(
301 "unknown SOHM B-tree record location: {other}"
302 )));
303 }
304 }
305 }
306
307 8 => {
309 let hash = cursor.read_u32_le()?;
310 let flags = cursor.read_u8()?;
311 let creation_order = cursor.read_u32_le()?;
312 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
313 BTreeV2Record::AttributeNameHash {
314 hash,
315 flags,
316 creation_order,
317 heap_id,
318 }
319 }
320
321 9 => {
323 let order = cursor.read_u32_le()?;
324 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
325 BTreeV2Record::AttributeCreationOrder { order, heap_id }
326 }
327
328 10 => {
330 let address = cursor.read_offset(offset_size)?;
331 let offset_bytes_available = record_size as usize - offset_size as usize;
335 let num_offsets = offset_bytes_available / 8;
336 let mut offsets = Vec::with_capacity(num_offsets);
337 for _ in 0..num_offsets {
338 offsets.push(cursor.read_u64_le()?);
339 }
340 BTreeV2Record::ChunkedNonFiltered { address, offsets }
341 }
342
343 11 => {
345 let address = cursor.read_offset(offset_size)?;
346 let nbytes_size = length_size as usize;
348 let chunk_size = cursor.read_length(length_size)?;
349 let filter_mask = cursor.read_u32_le()?;
350 let remaining = record_size as usize - offset_size as usize - nbytes_size - 4; let num_offsets = remaining / 8;
352 let mut offsets = Vec::with_capacity(num_offsets);
353 for _ in 0..num_offsets {
354 offsets.push(cursor.read_u64_le()?);
355 }
356 BTreeV2Record::ChunkedFiltered {
357 address,
358 chunk_size,
359 filter_mask,
360 offsets,
361 }
362 }
363
364 _ => {
366 let data = cursor.read_bytes(record_size as usize)?.to_vec();
367 return Ok(BTreeV2Record::Unknown {
368 record_type: btree_type,
369 data,
370 });
371 }
372 };
373
374 let consumed = (cursor.position() - record_start) as usize;
376 if consumed < record_size as usize {
377 cursor.skip(record_size as usize - consumed)?;
378 }
379
380 Ok(record)
381}
382
383fn record_matches_chunk_bounds(
384 record: &BTreeV2Record,
385 chunk_dims: &[u32],
386 chunk_bounds: Option<(&[u64], &[u64])>,
387) -> bool {
388 let Some((first_chunk, last_chunk)) = chunk_bounds else {
389 return true;
390 };
391
392 let offsets = match record {
393 BTreeV2Record::ChunkedNonFiltered { offsets, .. }
394 | BTreeV2Record::ChunkedFiltered { offsets, .. } => offsets,
395 _ => return true,
396 };
397
398 offsets.iter().enumerate().all(|(dim, offset)| {
399 let chunk_index = *offset / u64::from(chunk_dims[dim]);
400 chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
401 })
402}
403
404fn num_records_size(max_records: u64) -> usize {
411 if max_records <= 0xFF {
412 1
413 } else if max_records <= 0xFFFF {
414 2
415 } else if max_records <= 0xFFFF_FFFF {
416 4
417 } else {
418 8
419 }
420}
421
422fn max_leaf_records(node_size: u32, record_size: u16) -> u64 {
424 let overhead = 10u32;
426 if node_size <= overhead || record_size == 0 {
427 return 0;
428 }
429 ((node_size - overhead) / record_size as u32) as u64
430}
431
432fn max_internal_records(
437 node_size: u32,
438 record_size: u16,
439 offset_size: u8,
440 max_child_records: u64,
441) -> u64 {
442 let overhead = 10u32;
444 if node_size <= overhead || record_size == 0 {
445 return 0;
446 }
447 let available = (node_size - overhead) as u64;
448 let nrec_size = num_records_size(max_child_records) as u64;
450 let entry_size = record_size as u64 + offset_size as u64 + nrec_size;
451 let extra = offset_size as u64 + nrec_size;
455 if available <= extra {
456 return 0;
457 }
458 (available - extra) / entry_size
459}
460
461fn node_checksum_bounds(
462 cursor: &Cursor<'_>,
463 start: u64,
464 node_size: u32,
465 compact_end: u64,
466 context: &'static str,
467) -> Result<usize> {
468 let start = usize::try_from(start)
469 .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} offset is too large")))?;
470 let node_size = usize::try_from(node_size)
471 .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} size is too large")))?;
472 if node_size < 10 {
473 return Err(Error::InvalidData(format!(
474 "B-tree v2 {context} node is too small: {node_size} bytes"
475 )));
476 }
477
478 let checksum_pos = usize::try_from(compact_end)
479 .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} checksum is too large")))?;
480 if checksum_pos < start {
481 return Err(Error::InvalidData(format!(
482 "B-tree v2 {context} payload starts before node"
483 )));
484 }
485 let node_end = start
486 .checked_add(node_size)
487 .ok_or_else(|| Error::InvalidData(format!("B-tree v2 {context} size overflow")))?;
488
489 if node_end > cursor.data().len() {
490 return Err(Error::UnexpectedEof {
491 offset: start as u64,
492 needed: node_size as u64,
493 available: cursor.data().len().saturating_sub(start) as u64,
494 });
495 }
496 let checksum_end = checksum_pos
497 .checked_add(4)
498 .ok_or_else(|| Error::InvalidData(format!("B-tree v2 {context} checksum overflow")))?;
499 if checksum_end > node_end {
500 return Err(Error::InvalidData(format!(
501 "B-tree v2 {context} contents exceed node size"
502 )));
503 }
504
505 Ok(checksum_pos)
506}
507
508fn verify_node_checksum(
509 cursor: &mut Cursor<'_>,
510 start: u64,
511 node_size: u32,
512 compact_end: u64,
513 context: &'static str,
514) -> Result<()> {
515 let checksum_pos = node_checksum_bounds(cursor, start, node_size, compact_end, context)?;
516 cursor.set_position(checksum_pos as u64);
517 let stored_checksum = cursor.read_u32_le()?;
518 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_pos]);
519 if computed != stored_checksum {
520 return Err(Error::ChecksumMismatch {
521 expected: stored_checksum,
522 actual: computed,
523 });
524 }
525 Ok(())
526}
527
528#[allow(clippy::too_many_arguments)]
530fn parse_leaf_node(
531 cursor: &mut Cursor,
532 header: &BTreeV2Header,
533 offset_size: u8,
534 length_size: u8,
535 ndims: Option<u32>,
536 chunk_dims: &[u32],
537 chunk_bounds: Option<(&[u64], &[u64])>,
538 num_records: u16,
539 heap_id_len: usize,
540 records: &mut Vec<BTreeV2Record>,
541) -> Result<()> {
542 let start = cursor.position();
543
544 let sig = cursor.read_bytes(4)?;
545 if sig != BTLF_SIGNATURE {
546 return Err(Error::InvalidBTreeV2Signature {
547 context: "leaf node",
548 });
549 }
550
551 let version = cursor.read_u8()?;
552 if version != 0 {
553 return Err(Error::UnsupportedBTreeVersion(version));
554 }
555
556 let node_type = cursor.read_u8()?;
557 if node_type != header.btree_type {
558 return Err(Error::InvalidData(format!(
559 "B-tree v2 leaf node type mismatch: header says {}, node says {}",
560 header.btree_type, node_type
561 )));
562 }
563
564 for _ in 0..num_records {
565 let record = parse_record(
566 cursor,
567 header.btree_type,
568 header.record_size,
569 offset_size,
570 length_size,
571 ndims,
572 heap_id_len,
573 )?;
574 if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
575 records.push(record);
576 }
577 }
578
579 verify_node_checksum(
580 cursor,
581 start,
582 header.node_size,
583 cursor.position(),
584 "leaf node",
585 )?;
586
587 Ok(())
588}
589
590#[allow(clippy::too_many_arguments)]
592fn parse_internal_node(
593 data: &[u8],
594 address: u64,
595 header: &BTreeV2Header,
596 offset_size: u8,
597 length_size: u8,
598 ndims: Option<u32>,
599 chunk_dims: &[u32],
600 chunk_bounds: Option<(&[u64], &[u64])>,
601 num_records: u16,
602 depth: u16,
603 heap_id_len: usize,
604 records: &mut Vec<BTreeV2Record>,
605) -> Result<()> {
606 let mut cursor = Cursor::new(data);
607 cursor.set_position(address);
608
609 let start = cursor.position();
610
611 let sig = cursor.read_bytes(4)?;
612 if sig != BTIN_SIGNATURE {
613 return Err(Error::InvalidBTreeV2Signature {
614 context: "internal node",
615 });
616 }
617
618 let version = cursor.read_u8()?;
619 if version != 0 {
620 return Err(Error::UnsupportedBTreeVersion(version));
621 }
622
623 let node_type = cursor.read_u8()?;
624 if node_type != header.btree_type {
625 return Err(Error::InvalidData(format!(
626 "B-tree v2 internal node type mismatch: header says {}, node says {}",
627 header.btree_type, node_type
628 )));
629 }
630
631 let max_child_records = if depth == 1 {
634 max_leaf_records(header.node_size, header.record_size)
635 } else {
636 let leaf_max = max_leaf_records(header.node_size, header.record_size);
638 let mut prev_max = leaf_max;
639 for _ in 1..depth {
640 prev_max =
641 max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
642 }
643 prev_max
644 };
645 let nrec_bytes = num_records_size(max_child_records);
646
647 let mut node_records = Vec::with_capacity(num_records as usize);
659 for _ in 0..num_records {
660 let record = parse_record(
661 &mut cursor,
662 header.btree_type,
663 header.record_size,
664 offset_size,
665 length_size,
666 ndims,
667 heap_id_len,
668 )?;
669 if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
670 node_records.push(record);
671 }
672 }
673
674 let num_children = num_records as usize + 1;
676
677 let has_total_records = depth > 1;
680 let total_nrec_bytes = if has_total_records {
682 length_size as usize
685 } else {
686 0
687 };
688
689 let mut child_addresses = Vec::with_capacity(num_children);
690 let mut child_nrecords = Vec::with_capacity(num_children);
691
692 for _ in 0..num_children {
693 let child_addr = cursor.read_offset(offset_size)?;
694 child_addresses.push(child_addr);
695 let nrec = cursor.read_uvar(nrec_bytes)?;
696 child_nrecords.push(nrec as u16);
697 if has_total_records {
698 cursor.read_uvar(total_nrec_bytes)?;
700 }
701 }
702
703 let compact_end = cursor.position();
704 verify_node_checksum(
705 &mut cursor,
706 start,
707 header.node_size,
708 compact_end,
709 "internal node",
710 )?;
711
712 records.extend(node_records);
718
719 let child_depth = depth - 1;
721 for (i, &child_addr) in child_addresses.iter().enumerate() {
722 if Cursor::is_undefined_offset(child_addr, offset_size) {
723 continue;
724 }
725 let child_nrec = child_nrecords[i];
726 if child_depth == 0 {
727 let mut child_cursor = Cursor::new(data);
729 child_cursor.set_position(child_addr);
730 parse_leaf_node(
731 &mut child_cursor,
732 header,
733 offset_size,
734 length_size,
735 ndims,
736 chunk_dims,
737 chunk_bounds,
738 child_nrec,
739 heap_id_len,
740 records,
741 )?;
742 } else {
743 parse_internal_node(
745 data,
746 child_addr,
747 header,
748 offset_size,
749 length_size,
750 ndims,
751 chunk_dims,
752 chunk_bounds,
753 child_nrec,
754 child_depth,
755 heap_id_len,
756 records,
757 )?;
758 }
759 }
760
761 Ok(())
762}
763
764pub fn collect_btree_v2_records(
773 data: &[u8],
774 header: &BTreeV2Header,
775 offset_size: u8,
776 length_size: u8,
777 ndims: Option<u32>,
778 chunk_dims: &[u32],
779 chunk_bounds: Option<(&[u64], &[u64])>,
780) -> Result<Vec<BTreeV2Record>> {
781 if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
782 return Ok(Vec::new());
783 }
784
785 if header.total_records == 0 || header.num_records_in_root == 0 {
786 return Ok(Vec::new());
787 }
788
789 let heap_id_len = compute_heap_id_len(header);
791
792 let mut records = Vec::new();
793
794 if header.depth == 0 {
795 let mut cursor = Cursor::new(data);
797 cursor.set_position(header.root_node_address);
798 parse_leaf_node(
799 &mut cursor,
800 header,
801 offset_size,
802 length_size,
803 ndims,
804 chunk_dims,
805 chunk_bounds,
806 header.num_records_in_root,
807 heap_id_len,
808 &mut records,
809 )?;
810 } else {
811 parse_internal_node(
813 data,
814 header.root_node_address,
815 header,
816 offset_size,
817 length_size,
818 ndims,
819 chunk_dims,
820 chunk_bounds,
821 header.num_records_in_root,
822 header.depth,
823 heap_id_len,
824 &mut records,
825 )?;
826 }
827
828 Ok(records)
829}
830
831pub fn collect_btree_v2_records_storage(
833 storage: &dyn Storage,
834 header: &BTreeV2Header,
835 offset_size: u8,
836 length_size: u8,
837 ndims: Option<u32>,
838 chunk_dims: &[u32],
839 chunk_bounds: Option<(&[u64], &[u64])>,
840) -> Result<Vec<BTreeV2Record>> {
841 if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
842 return Ok(Vec::new());
843 }
844
845 if header.total_records == 0 || header.num_records_in_root == 0 {
846 return Ok(Vec::new());
847 }
848
849 let heap_id_len = compute_heap_id_len(header);
850 let mut records = Vec::new();
851
852 if header.depth == 0 {
853 parse_leaf_node_storage(
854 storage,
855 header.root_node_address,
856 header,
857 offset_size,
858 length_size,
859 ndims,
860 chunk_dims,
861 chunk_bounds,
862 header.num_records_in_root,
863 heap_id_len,
864 &mut records,
865 )?;
866 } else {
867 parse_internal_node_storage(
868 storage,
869 header.root_node_address,
870 header,
871 offset_size,
872 length_size,
873 ndims,
874 chunk_dims,
875 chunk_bounds,
876 header.num_records_in_root,
877 header.depth,
878 heap_id_len,
879 &mut records,
880 )?;
881 }
882
883 Ok(records)
884}
885
886#[allow(clippy::too_many_arguments)]
887fn parse_leaf_node_storage(
888 storage: &dyn Storage,
889 address: u64,
890 header: &BTreeV2Header,
891 offset_size: u8,
892 length_size: u8,
893 ndims: Option<u32>,
894 chunk_dims: &[u32],
895 chunk_bounds: Option<(&[u64], &[u64])>,
896 num_records: u16,
897 heap_id_len: usize,
898 records: &mut Vec<BTreeV2Record>,
899) -> Result<()> {
900 let node_len = usize::try_from(header.node_size).map_err(|_| {
901 Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
902 })?;
903 let node_bytes = storage.read_range(address, node_len)?;
904 let mut cursor = Cursor::new(node_bytes.as_ref());
905 parse_leaf_node(
906 &mut cursor,
907 header,
908 offset_size,
909 length_size,
910 ndims,
911 chunk_dims,
912 chunk_bounds,
913 num_records,
914 heap_id_len,
915 records,
916 )
917}
918
919#[allow(clippy::too_many_arguments)]
920fn parse_internal_node_storage(
921 storage: &dyn Storage,
922 address: u64,
923 header: &BTreeV2Header,
924 offset_size: u8,
925 length_size: u8,
926 ndims: Option<u32>,
927 chunk_dims: &[u32],
928 chunk_bounds: Option<(&[u64], &[u64])>,
929 num_records: u16,
930 depth: u16,
931 heap_id_len: usize,
932 records: &mut Vec<BTreeV2Record>,
933) -> Result<()> {
934 let node_len = usize::try_from(header.node_size).map_err(|_| {
935 Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
936 })?;
937 let node_bytes = storage.read_range(address, node_len)?;
938 let mut cursor = Cursor::new(node_bytes.as_ref());
939 let start = cursor.position();
940
941 let sig = cursor.read_bytes(4)?;
942 if sig != BTIN_SIGNATURE {
943 return Err(Error::InvalidBTreeV2Signature {
944 context: "internal node",
945 });
946 }
947
948 let version = cursor.read_u8()?;
949 if version != 0 {
950 return Err(Error::UnsupportedBTreeVersion(version));
951 }
952
953 let node_type = cursor.read_u8()?;
954 if node_type != header.btree_type {
955 return Err(Error::InvalidData(format!(
956 "B-tree v2 internal node type mismatch: header says {}, node says {}",
957 header.btree_type, node_type
958 )));
959 }
960
961 let max_child_records = if depth == 1 {
962 max_leaf_records(header.node_size, header.record_size)
963 } else {
964 let leaf_max = max_leaf_records(header.node_size, header.record_size);
965 let mut prev_max = leaf_max;
966 for _ in 1..depth {
967 prev_max =
968 max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
969 }
970 prev_max
971 };
972 let nrec_bytes = num_records_size(max_child_records);
973
974 let mut node_records = Vec::with_capacity(num_records as usize);
975 for _ in 0..num_records {
976 let record = parse_record(
977 &mut cursor,
978 header.btree_type,
979 header.record_size,
980 offset_size,
981 length_size,
982 ndims,
983 heap_id_len,
984 )?;
985 if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
986 node_records.push(record);
987 }
988 }
989
990 let num_children = usize::from(num_records) + 1;
991 let has_total_records = depth > 1;
992 let total_nrec_bytes = if has_total_records {
993 usize::from(length_size)
994 } else {
995 0
996 };
997
998 let mut child_addresses = Vec::with_capacity(num_children);
999 let mut child_nrecords = Vec::with_capacity(num_children);
1000 for _ in 0..num_children {
1001 child_addresses.push(cursor.read_offset(offset_size)?);
1002 child_nrecords.push(cursor.read_uvar(nrec_bytes)? as u16);
1003 if has_total_records {
1004 cursor.read_uvar(total_nrec_bytes)?;
1005 }
1006 }
1007
1008 let compact_end = cursor.position();
1009 verify_node_checksum(
1010 &mut cursor,
1011 start,
1012 header.node_size,
1013 compact_end,
1014 "internal node",
1015 )?;
1016
1017 records.extend(node_records);
1018
1019 let child_depth = depth - 1;
1020 for (i, &child_addr) in child_addresses.iter().enumerate() {
1021 if Cursor::is_undefined_offset(child_addr, offset_size) {
1022 continue;
1023 }
1024 let child_nrec = child_nrecords[i];
1025 if child_depth == 0 {
1026 parse_leaf_node_storage(
1027 storage,
1028 child_addr,
1029 header,
1030 offset_size,
1031 length_size,
1032 ndims,
1033 chunk_dims,
1034 chunk_bounds,
1035 child_nrec,
1036 heap_id_len,
1037 records,
1038 )?;
1039 } else {
1040 parse_internal_node_storage(
1041 storage,
1042 child_addr,
1043 header,
1044 offset_size,
1045 length_size,
1046 ndims,
1047 chunk_dims,
1048 chunk_bounds,
1049 child_nrec,
1050 child_depth,
1051 heap_id_len,
1052 records,
1053 )?;
1054 }
1055 }
1056
1057 Ok(())
1058}
1059
1060fn compute_heap_id_len(header: &BTreeV2Header) -> usize {
1066 let rs = header.record_size as usize;
1067 match header.btree_type {
1068 5 => rs.saturating_sub(4), 6 => rs.saturating_sub(8), 8 => rs.saturating_sub(4 + 1 + 4), 9 => rs.saturating_sub(4), _ => 0,
1073 }
1074}
1075
1076#[cfg(test)]
1077mod tests {
1078 use super::*;
1079
1080 #[allow(clippy::too_many_arguments)]
1082 fn build_header(
1083 btree_type: u8,
1084 node_size: u32,
1085 record_size: u16,
1086 depth: u16,
1087 root_node_address: u64,
1088 num_records_in_root: u16,
1089 total_records: u64,
1090 offset_size: u8,
1091 length_size: u8,
1092 ) -> Vec<u8> {
1093 let mut buf = Vec::new();
1094 buf.extend_from_slice(b"BTHD");
1095 buf.push(0); buf.push(btree_type);
1097 buf.extend_from_slice(&node_size.to_le_bytes());
1098 buf.extend_from_slice(&record_size.to_le_bytes());
1099 buf.extend_from_slice(&depth.to_le_bytes());
1100 buf.push(75); buf.push(40); match offset_size {
1103 4 => buf.extend_from_slice(&(root_node_address as u32).to_le_bytes()),
1104 8 => buf.extend_from_slice(&root_node_address.to_le_bytes()),
1105 _ => panic!("unsupported"),
1106 }
1107 buf.extend_from_slice(&num_records_in_root.to_le_bytes());
1108 match length_size {
1109 4 => buf.extend_from_slice(&(total_records as u32).to_le_bytes()),
1110 8 => buf.extend_from_slice(&total_records.to_le_bytes()),
1111 _ => panic!("unsupported"),
1112 }
1113 let checksum = jenkins_lookup3(&buf);
1115 buf.extend_from_slice(&checksum.to_le_bytes());
1116 buf
1117 }
1118
1119 #[test]
1120 fn test_parse_header() {
1121 let data = build_header(5, 4096, 12, 0, 0x1000, 3, 3, 8, 8);
1122 let mut cursor = Cursor::new(&data);
1123 let hdr = BTreeV2Header::parse(&mut cursor, 8, 8).unwrap();
1124
1125 assert_eq!(hdr.btree_type, 5);
1126 assert_eq!(hdr.node_size, 4096);
1127 assert_eq!(hdr.record_size, 12);
1128 assert_eq!(hdr.depth, 0);
1129 assert_eq!(hdr.split_percent, 75);
1130 assert_eq!(hdr.merge_percent, 40);
1131 assert_eq!(hdr.root_node_address, 0x1000);
1132 assert_eq!(hdr.num_records_in_root, 3);
1133 assert_eq!(hdr.total_records, 3);
1134 }
1135
1136 #[test]
1137 fn test_bad_signature() {
1138 let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1139 data[0] = b'X';
1140 let mut cursor = Cursor::new(&data);
1141 assert!(matches!(
1142 BTreeV2Header::parse(&mut cursor, 8, 8),
1143 Err(Error::InvalidBTreeV2Signature { .. })
1144 ));
1145 }
1146
1147 #[test]
1148 fn test_bad_checksum() {
1149 let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1150 data[6] = 0xFF;
1152 let mut cursor = Cursor::new(&data);
1153 assert!(matches!(
1154 BTreeV2Header::parse(&mut cursor, 8, 8),
1155 Err(Error::ChecksumMismatch { .. })
1156 ));
1157 }
1158
1159 #[test]
1160 fn test_collect_empty_tree() {
1161 let header = BTreeV2Header {
1162 btree_type: 5,
1163 node_size: 4096,
1164 record_size: 12,
1165 depth: 0,
1166 split_percent: 75,
1167 merge_percent: 40,
1168 root_node_address: u64::MAX,
1169 num_records_in_root: 0,
1170 total_records: 0,
1171 };
1172 let data = vec![0u8; 100];
1173 let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1174 assert!(records.is_empty());
1175 }
1176
1177 #[test]
1178 fn test_compute_heap_id_len() {
1179 let h5 = BTreeV2Header {
1181 btree_type: 5,
1182 record_size: 12,
1183 node_size: 0,
1184 depth: 0,
1185 split_percent: 0,
1186 merge_percent: 0,
1187 root_node_address: 0,
1188 num_records_in_root: 0,
1189 total_records: 0,
1190 };
1191 assert_eq!(compute_heap_id_len(&h5), 8);
1192
1193 let h8 = BTreeV2Header {
1195 btree_type: 8,
1196 record_size: 17,
1197 ..h5
1198 };
1199 assert_eq!(compute_heap_id_len(&h8), 8);
1200 }
1201
1202 #[test]
1203 fn test_max_leaf_records() {
1204 assert_eq!(max_leaf_records(4096, 12), 340);
1207 }
1208
1209 #[test]
1210 fn test_num_records_size() {
1211 assert_eq!(num_records_size(0), 1);
1212 assert_eq!(num_records_size(255), 1);
1213 assert_eq!(num_records_size(256), 2);
1214 assert_eq!(num_records_size(65535), 2);
1215 assert_eq!(num_records_size(65536), 4);
1216 }
1217
1218 #[test]
1219 fn test_parse_huge_indirect_record() {
1220 let mut data = Vec::new();
1221 data.extend_from_slice(&0x1234u64.to_le_bytes());
1222 data.extend_from_slice(&99u64.to_le_bytes());
1223 data.extend_from_slice(&7u64.to_le_bytes());
1224 let mut cursor = Cursor::new(&data);
1225
1226 let record = parse_record(&mut cursor, 1, data.len() as u16, 8, 8, None, 0).unwrap();
1227 match record {
1228 BTreeV2Record::HugeIndirectNonFiltered {
1229 address,
1230 length,
1231 object_id,
1232 } => {
1233 assert_eq!(address, 0x1234);
1234 assert_eq!(length, 99);
1235 assert_eq!(object_id, 7);
1236 }
1237 other => panic!("expected huge record, got {:?}", other),
1238 }
1239 }
1240
1241 #[test]
1242 fn test_parse_shared_heap_record() {
1243 let mut data = Vec::new();
1244 data.push(0);
1245 data.extend_from_slice(&[0, 0, 0]);
1246 data.extend_from_slice(&0xAABB_CCDDu32.to_le_bytes());
1247 data.extend_from_slice(&3u32.to_le_bytes());
1248 data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1249 let mut cursor = Cursor::new(&data);
1250
1251 let record = parse_record(&mut cursor, 7, data.len() as u16, 8, 8, None, 0).unwrap();
1252 match record {
1253 BTreeV2Record::SharedMessageHeap {
1254 hash,
1255 reference_count,
1256 heap_id,
1257 } => {
1258 assert_eq!(hash, 0xAABB_CCDD);
1259 assert_eq!(reference_count, 3);
1260 assert_eq!(heap_id, vec![1, 2, 3, 4, 5, 6, 7, 8]);
1261 }
1262 other => panic!("expected shared heap record, got {:?}", other),
1263 }
1264 }
1265
1266 #[test]
1267 fn test_parse_leaf_with_type5_records() {
1268 let record_size: u16 = 12;
1271 let node_size: u32 = 4096;
1272
1273 let header = BTreeV2Header {
1274 btree_type: 5,
1275 node_size,
1276 record_size,
1277 depth: 0,
1278 split_percent: 75,
1279 merge_percent: 40,
1280 root_node_address: 0, num_records_in_root: 2,
1282 total_records: 2,
1283 };
1284
1285 let mut leaf = Vec::new();
1287 leaf.extend_from_slice(b"BTLF"); leaf.push(0); leaf.push(5); leaf.extend_from_slice(&0xAABBCCDDu32.to_le_bytes());
1293 leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1294
1295 leaf.extend_from_slice(&0x11223344u32.to_le_bytes());
1297 leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1298
1299 let checksum = jenkins_lookup3(&leaf);
1302 leaf.extend_from_slice(&checksum.to_le_bytes());
1303 leaf.resize(node_size as usize, 0);
1304
1305 let mut records = Vec::new();
1306 let mut cursor = Cursor::new(&leaf);
1307 parse_leaf_node(
1308 &mut cursor,
1309 &header,
1310 8,
1311 8,
1312 None,
1313 &[],
1314 None,
1315 2,
1316 8, &mut records,
1318 )
1319 .unwrap();
1320
1321 assert_eq!(records.len(), 2);
1322 match &records[0] {
1323 BTreeV2Record::LinkNameHash { hash, heap_id } => {
1324 assert_eq!(*hash, 0xAABBCCDD);
1325 assert_eq!(heap_id, &[1, 2, 3, 4, 5, 6, 7, 8]);
1326 }
1327 _ => panic!("expected LinkNameHash"),
1328 }
1329 match &records[1] {
1330 BTreeV2Record::LinkNameHash { hash, heap_id } => {
1331 assert_eq!(*hash, 0x11223344);
1332 assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1333 }
1334 _ => panic!("expected LinkNameHash"),
1335 }
1336 }
1337}