1use std::collections::HashSet;
12
13use crate::checksum::jenkins_lookup3;
14use crate::error::{Error, Result};
15use crate::io::Cursor;
16use crate::storage::Storage;
17
18const BTHD_SIGNATURE: [u8; 4] = *b"BTHD";
23const BTIN_SIGNATURE: [u8; 4] = *b"BTIN";
24const BTLF_SIGNATURE: [u8; 4] = *b"BTLF";
25const MAX_BTREE_V2_DEPTH: u16 = 64;
26
27#[derive(Debug, Clone)]
33pub struct BTreeV2Header {
34 pub btree_type: u8,
36 pub node_size: u32,
38 pub record_size: u16,
40 pub depth: u16,
42 pub split_percent: u8,
44 pub merge_percent: u8,
46 pub root_node_address: u64,
48 pub num_records_in_root: u16,
50 pub total_records: u64,
52}
53
54impl BTreeV2Header {
55 pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
71 let start = cursor.position();
72
73 let sig = cursor.read_bytes(4)?;
74 if sig != BTHD_SIGNATURE {
75 return Err(Error::InvalidBTreeV2Signature { context: "header" });
76 }
77
78 let version = cursor.read_u8()?;
79 if version != 0 {
80 return Err(Error::UnsupportedBTreeVersion(version));
81 }
82
83 let btree_type = cursor.read_u8()?;
84 let node_size = cursor.read_u32_le()?;
85 let record_size = cursor.read_u16_le()?;
86 let depth = cursor.read_u16_le()?;
87 let split_percent = cursor.read_u8()?;
88 let merge_percent = cursor.read_u8()?;
89 let root_node_address = cursor.read_offset(offset_size)?;
90 let num_records_in_root = cursor.read_u16_le()?;
91 let total_records = cursor.read_length(length_size)?;
92
93 let checksum_end = cursor.position();
95 let stored_checksum = cursor.read_u32_le()?;
96
97 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_end as usize]);
98 if computed != stored_checksum {
99 return Err(Error::ChecksumMismatch {
100 expected: stored_checksum,
101 actual: computed,
102 });
103 }
104
105 Ok(BTreeV2Header {
106 btree_type,
107 node_size,
108 record_size,
109 depth,
110 split_percent,
111 merge_percent,
112 root_node_address,
113 num_records_in_root,
114 total_records,
115 })
116 }
117
118 pub fn parse_at_storage(
120 storage: &dyn Storage,
121 address: u64,
122 offset_size: u8,
123 length_size: u8,
124 ) -> Result<Self> {
125 let header_len = 4
126 + 1
127 + 1
128 + 4
129 + 2
130 + 2
131 + 1
132 + 1
133 + usize::from(offset_size)
134 + 2
135 + usize::from(length_size)
136 + 4;
137 let bytes = storage.read_range(address, header_len)?;
138 let mut cursor = Cursor::new(bytes.as_ref());
139 Self::parse(&mut cursor, offset_size, length_size)
140 }
141}
142
143#[derive(Debug, Clone)]
151pub enum BTreeV2Record {
152 HugeIndirectNonFiltered {
154 address: u64,
155 length: u64,
156 object_id: u64,
157 },
158 HugeIndirectFiltered {
160 address: u64,
161 filtered_length: u64,
162 filter_mask: u32,
163 memory_length: u64,
164 object_id: u64,
165 },
166 HugeDirectNonFiltered { address: u64, length: u64 },
168 HugeDirectFiltered {
170 address: u64,
171 filtered_length: u64,
172 filter_mask: u32,
173 memory_length: u64,
174 },
175 LinkNameHash { hash: u32, heap_id: Vec<u8> },
177 CreationOrder { order: u64, heap_id: Vec<u8> },
179 AttributeNameHash {
181 hash: u32,
182 flags: u8,
183 creation_order: u32,
184 heap_id: Vec<u8>,
185 },
186 AttributeCreationOrder { order: u32, heap_id: Vec<u8> },
188 ChunkedNonFiltered { address: u64, offsets: Vec<u64> },
190 ChunkedFiltered {
192 address: u64,
193 chunk_size: u64,
194 filter_mask: u32,
195 offsets: Vec<u64>,
196 },
197 SharedMessageHeap {
199 hash: u32,
200 reference_count: u32,
201 heap_id: Vec<u8>,
202 },
203 SharedMessageObjectHeader {
205 hash: u32,
206 message_type: u16,
207 object_header_index: u16,
208 object_header_address: u64,
209 },
210 Unknown { record_type: u8, data: Vec<u8> },
212}
213
214#[allow(clippy::too_many_arguments)]
220fn parse_record(
221 cursor: &mut Cursor,
222 btree_type: u8,
223 record_size: u16,
224 offset_size: u8,
225 length_size: u8,
226 ndims: Option<u32>,
227 chunk_dims: &[u32],
228 heap_id_len: usize,
229) -> Result<BTreeV2Record> {
230 let record_start = cursor.position();
231
232 let record = match btree_type {
233 1 => BTreeV2Record::HugeIndirectNonFiltered {
235 address: cursor.read_offset(offset_size)?,
236 length: cursor.read_length(length_size)?,
237 object_id: cursor.read_length(length_size)?,
238 },
239
240 2 => BTreeV2Record::HugeIndirectFiltered {
242 address: cursor.read_offset(offset_size)?,
243 filtered_length: cursor.read_length(length_size)?,
244 filter_mask: cursor.read_u32_le()?,
245 memory_length: cursor.read_length(length_size)?,
246 object_id: cursor.read_length(length_size)?,
247 },
248
249 3 => BTreeV2Record::HugeDirectNonFiltered {
251 address: cursor.read_offset(offset_size)?,
252 length: cursor.read_length(length_size)?,
253 },
254
255 4 => BTreeV2Record::HugeDirectFiltered {
257 address: cursor.read_offset(offset_size)?,
258 filtered_length: cursor.read_length(length_size)?,
259 filter_mask: cursor.read_u32_le()?,
260 memory_length: cursor.read_length(length_size)?,
261 },
262
263 5 => {
265 let hash = cursor.read_u32_le()?;
266 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
267 BTreeV2Record::LinkNameHash { hash, heap_id }
268 }
269
270 6 => {
272 let order = cursor.read_u64_le()?;
273 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
274 BTreeV2Record::CreationOrder { order, heap_id }
275 }
276
277 7 => {
279 let location = cursor.read_u8()?;
280 cursor.skip(3)?;
281 let hash = cursor.read_u32_le()?;
282 match location {
283 0 => {
284 let reference_count = cursor.read_u32_le()?;
285 let heap_id = cursor.read_bytes(8)?.to_vec();
286 BTreeV2Record::SharedMessageHeap {
287 hash,
288 reference_count,
289 heap_id,
290 }
291 }
292 1 => {
293 let _reserved = cursor.read_u8()?;
294 let message_type = u16::from(cursor.read_u8()?);
295 let object_header_index = cursor.read_u16_le()?;
296 let object_header_address = cursor.read_offset(offset_size)?;
297 BTreeV2Record::SharedMessageObjectHeader {
298 hash,
299 message_type,
300 object_header_index,
301 object_header_address,
302 }
303 }
304 other => {
305 return Err(Error::InvalidData(format!(
306 "unknown SOHM B-tree record location: {other}"
307 )));
308 }
309 }
310 }
311
312 8 => {
314 let hash = cursor.read_u32_le()?;
315 let flags = cursor.read_u8()?;
316 let creation_order = cursor.read_u32_le()?;
317 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
318 BTreeV2Record::AttributeNameHash {
319 hash,
320 flags,
321 creation_order,
322 heap_id,
323 }
324 }
325
326 9 => {
328 let order = cursor.read_u32_le()?;
329 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
330 BTreeV2Record::AttributeCreationOrder { order, heap_id }
331 }
332
333 10 => {
335 let offset_bytes_available = record_payload_len(
339 record_size,
340 offset_size as usize,
341 "B-tree v2 type-10 chunk record is shorter than its address",
342 )?;
343 let address = cursor.read_offset(offset_size)?;
344 let num_offsets = offset_bytes_available / 8;
345 let offsets = read_scaled_chunk_offsets(cursor, num_offsets, ndims, chunk_dims)?;
346 BTreeV2Record::ChunkedNonFiltered { address, offsets }
347 }
348
349 11 => {
351 let nbytes_size = length_size as usize;
353 let fixed_size = offset_size as usize + nbytes_size + 4; let remaining = record_payload_len(
355 record_size,
356 fixed_size,
357 "B-tree v2 type-11 chunk record is shorter than its fixed fields",
358 )?;
359 let address = cursor.read_offset(offset_size)?;
360 let chunk_size = cursor.read_length(length_size)?;
361 let filter_mask = cursor.read_u32_le()?;
362 let num_offsets = remaining / 8;
363 let offsets = read_scaled_chunk_offsets(cursor, num_offsets, ndims, chunk_dims)?;
364 BTreeV2Record::ChunkedFiltered {
365 address,
366 chunk_size,
367 filter_mask,
368 offsets,
369 }
370 }
371
372 _ => {
374 let data = cursor.read_bytes(record_size as usize)?.to_vec();
375 return Ok(BTreeV2Record::Unknown {
376 record_type: btree_type,
377 data,
378 });
379 }
380 };
381
382 let consumed = cursor.position() - record_start;
384 let record_size = u64::from(record_size);
385 if consumed > record_size {
386 return Err(Error::InvalidData(format!(
387 "B-tree v2 type-{btree_type} record consumed {consumed} bytes but record size is {record_size}"
388 )));
389 }
390 if consumed < record_size {
391 cursor.skip((record_size - consumed) as usize)?;
392 }
393
394 Ok(record)
395}
396
397fn record_payload_len(record_size: u16, fixed_size: usize, error_message: &str) -> Result<usize> {
398 (record_size as usize)
399 .checked_sub(fixed_size)
400 .ok_or_else(|| Error::InvalidData(error_message.into()))
401}
402
403fn read_scaled_chunk_offsets(
404 cursor: &mut Cursor,
405 num_offsets: usize,
406 ndims: Option<u32>,
407 chunk_dims: &[u32],
408) -> Result<Vec<u64>> {
409 if let Some(ndims) = ndims {
410 let ndims = usize::try_from(ndims)
411 .map_err(|_| Error::InvalidData("B-tree v2 chunk rank exceeds usize".into()))?;
412 if num_offsets != ndims {
413 return Err(Error::InvalidData(format!(
414 "B-tree v2 chunk record has {num_offsets} offsets for {ndims} dimensions"
415 )));
416 }
417 }
418
419 if !chunk_dims.is_empty() && num_offsets != chunk_dims.len() {
420 return Err(Error::InvalidData(format!(
421 "B-tree v2 chunk record has {num_offsets} offsets but {} chunk dimensions",
422 chunk_dims.len()
423 )));
424 }
425
426 let mut offsets = Vec::with_capacity(num_offsets);
427 for dim in 0..num_offsets {
428 let scaled = cursor.read_u64_le()?;
429 let chunk_extent = chunk_dims.get(dim).copied().unwrap_or(1);
430 let offset = scaled
431 .checked_mul(u64::from(chunk_extent))
432 .ok_or_else(|| Error::InvalidData("B-tree v2 chunk offset overflows u64".into()))?;
433 offsets.push(offset);
434 }
435 Ok(offsets)
436}
437
438fn record_matches_chunk_bounds(
439 record: &BTreeV2Record,
440 chunk_dims: &[u32],
441 chunk_bounds: Option<(&[u64], &[u64])>,
442) -> bool {
443 let Some((first_chunk, last_chunk)) = chunk_bounds else {
444 return true;
445 };
446
447 let offsets = match record {
448 BTreeV2Record::ChunkedNonFiltered { offsets, .. }
449 | BTreeV2Record::ChunkedFiltered { offsets, .. } => offsets,
450 _ => return true,
451 };
452
453 offsets.iter().enumerate().all(|(dim, offset)| {
454 let chunk_index = *offset / u64::from(chunk_dims[dim]);
455 chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
456 })
457}
458
459fn record_matches_link_name_hash(record: &BTreeV2Record, target_hash: Option<u32>) -> bool {
460 match target_hash {
461 Some(target_hash) => {
462 matches!(record, BTreeV2Record::LinkNameHash { hash, .. } if *hash == target_hash)
463 }
464 None => true,
465 }
466}
467
468fn record_matches_query(
469 record: &BTreeV2Record,
470 chunk_dims: &[u32],
471 chunk_bounds: Option<(&[u64], &[u64])>,
472 link_name_hash: Option<u32>,
473) -> bool {
474 record_matches_link_name_hash(record, link_name_hash)
475 && record_matches_chunk_bounds(record, chunk_dims, chunk_bounds)
476}
477
478fn link_name_hash(record: &BTreeV2Record) -> Option<u32> {
479 match record {
480 BTreeV2Record::LinkNameHash { hash, .. } => Some(*hash),
481 _ => None,
482 }
483}
484
485fn child_may_match_link_name_hash(
486 records: &[BTreeV2Record],
487 child_index: usize,
488 target_hash: Option<u32>,
489) -> bool {
490 let Some(target_hash) = target_hash else {
491 return true;
492 };
493
494 let lower_matches = child_index == 0
495 || link_name_hash(&records[child_index - 1]).map_or(true, |hash| target_hash >= hash);
496 let upper_matches = child_index == records.len()
497 || link_name_hash(&records[child_index]).map_or(true, |hash| target_hash <= hash);
498 lower_matches && upper_matches
499}
500
501fn validate_btree_v2_depth(depth: u16) -> Result<()> {
502 if depth > MAX_BTREE_V2_DEPTH {
503 return Err(Error::InvalidData(format!(
504 "B-tree v2 depth {depth} exceeds traversal limit {MAX_BTREE_V2_DEPTH}"
505 )));
506 }
507 Ok(())
508}
509
510fn enter_btree_v2_node(visited: &mut HashSet<u64>, address: u64) -> Result<()> {
511 if !visited.insert(address) {
512 return Err(Error::InvalidData(format!(
513 "B-tree v2 traversal revisits node at offset {address:#x}"
514 )));
515 }
516 Ok(())
517}
518
519fn num_records_size(max_records: u64) -> usize {
526 if max_records <= 0xFF {
527 1
528 } else if max_records <= 0xFFFF {
529 2
530 } else if max_records <= 0xFFFF_FFFF {
531 4
532 } else {
533 8
534 }
535}
536
537fn max_leaf_records(node_size: u32, record_size: u16) -> u64 {
539 let overhead = 10u32;
541 if node_size <= overhead || record_size == 0 {
542 return 0;
543 }
544 ((node_size - overhead) / record_size as u32) as u64
545}
546
547fn max_internal_records(
552 node_size: u32,
553 record_size: u16,
554 offset_size: u8,
555 max_child_records: u64,
556) -> u64 {
557 let overhead = 10u32;
559 if node_size <= overhead || record_size == 0 {
560 return 0;
561 }
562 let available = (node_size - overhead) as u64;
563 let nrec_size = num_records_size(max_child_records) as u64;
565 let entry_size = record_size as u64 + offset_size as u64 + nrec_size;
566 let extra = offset_size as u64 + nrec_size;
570 if available <= extra {
571 return 0;
572 }
573 (available - extra) / entry_size
574}
575
576fn node_checksum_bounds(
577 cursor: &Cursor<'_>,
578 start: u64,
579 node_size: u32,
580 compact_end: u64,
581 context: &'static str,
582) -> Result<usize> {
583 let start = usize::try_from(start)
584 .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} offset is too large")))?;
585 let node_size = usize::try_from(node_size)
586 .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} size is too large")))?;
587 if node_size < 10 {
588 return Err(Error::InvalidData(format!(
589 "B-tree v2 {context} node is too small: {node_size} bytes"
590 )));
591 }
592
593 let checksum_pos = usize::try_from(compact_end)
594 .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} checksum is too large")))?;
595 if checksum_pos < start {
596 return Err(Error::InvalidData(format!(
597 "B-tree v2 {context} payload starts before node"
598 )));
599 }
600 let node_end = start
601 .checked_add(node_size)
602 .ok_or_else(|| Error::InvalidData(format!("B-tree v2 {context} size overflow")))?;
603
604 if node_end > cursor.data().len() {
605 return Err(Error::UnexpectedEof {
606 offset: start as u64,
607 needed: node_size as u64,
608 available: cursor.data().len().saturating_sub(start) as u64,
609 });
610 }
611 let checksum_end = checksum_pos
612 .checked_add(4)
613 .ok_or_else(|| Error::InvalidData(format!("B-tree v2 {context} checksum overflow")))?;
614 if checksum_end > node_end {
615 return Err(Error::InvalidData(format!(
616 "B-tree v2 {context} contents exceed node size"
617 )));
618 }
619
620 Ok(checksum_pos)
621}
622
623fn verify_node_checksum(
624 cursor: &mut Cursor<'_>,
625 start: u64,
626 node_size: u32,
627 compact_end: u64,
628 context: &'static str,
629) -> Result<()> {
630 let checksum_pos = node_checksum_bounds(cursor, start, node_size, compact_end, context)?;
631 cursor.set_position(checksum_pos as u64);
632 let stored_checksum = cursor.read_u32_le()?;
633 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_pos]);
634 if computed != stored_checksum {
635 return Err(Error::ChecksumMismatch {
636 expected: stored_checksum,
637 actual: computed,
638 });
639 }
640 Ok(())
641}
642
643#[allow(clippy::too_many_arguments)]
645fn parse_leaf_node(
646 cursor: &mut Cursor,
647 header: &BTreeV2Header,
648 offset_size: u8,
649 length_size: u8,
650 ndims: Option<u32>,
651 chunk_dims: &[u32],
652 chunk_bounds: Option<(&[u64], &[u64])>,
653 link_name_hash: Option<u32>,
654 num_records: u16,
655 heap_id_len: usize,
656 records: &mut Vec<BTreeV2Record>,
657) -> Result<()> {
658 let start = cursor.position();
659
660 let sig = cursor.read_bytes(4)?;
661 if sig != BTLF_SIGNATURE {
662 return Err(Error::InvalidBTreeV2Signature {
663 context: "leaf node",
664 });
665 }
666
667 let version = cursor.read_u8()?;
668 if version != 0 {
669 return Err(Error::UnsupportedBTreeVersion(version));
670 }
671
672 let node_type = cursor.read_u8()?;
673 if node_type != header.btree_type {
674 return Err(Error::InvalidData(format!(
675 "B-tree v2 leaf node type mismatch: header says {}, node says {}",
676 header.btree_type, node_type
677 )));
678 }
679
680 for _ in 0..num_records {
681 let record = parse_record(
682 cursor,
683 header.btree_type,
684 header.record_size,
685 offset_size,
686 length_size,
687 ndims,
688 chunk_dims,
689 heap_id_len,
690 )?;
691 if record_matches_query(&record, chunk_dims, chunk_bounds, link_name_hash) {
692 records.push(record);
693 }
694 }
695
696 verify_node_checksum(
697 cursor,
698 start,
699 header.node_size,
700 cursor.position(),
701 "leaf node",
702 )?;
703
704 Ok(())
705}
706
707#[allow(clippy::too_many_arguments)]
709fn parse_internal_node(
710 data: &[u8],
711 address: u64,
712 header: &BTreeV2Header,
713 offset_size: u8,
714 length_size: u8,
715 ndims: Option<u32>,
716 chunk_dims: &[u32],
717 chunk_bounds: Option<(&[u64], &[u64])>,
718 link_name_hash: Option<u32>,
719 num_records: u16,
720 depth: u16,
721 heap_id_len: usize,
722 visited: &mut HashSet<u64>,
723 records: &mut Vec<BTreeV2Record>,
724) -> Result<()> {
725 if depth == 0 {
726 return Err(Error::InvalidData(
727 "B-tree v2 internal node traversal reached depth zero".into(),
728 ));
729 }
730 enter_btree_v2_node(visited, address)?;
731
732 let mut cursor = Cursor::new(data);
733 cursor.set_position(address);
734
735 let start = cursor.position();
736
737 let sig = cursor.read_bytes(4)?;
738 if sig != BTIN_SIGNATURE {
739 return Err(Error::InvalidBTreeV2Signature {
740 context: "internal node",
741 });
742 }
743
744 let version = cursor.read_u8()?;
745 if version != 0 {
746 return Err(Error::UnsupportedBTreeVersion(version));
747 }
748
749 let node_type = cursor.read_u8()?;
750 if node_type != header.btree_type {
751 return Err(Error::InvalidData(format!(
752 "B-tree v2 internal node type mismatch: header says {}, node says {}",
753 header.btree_type, node_type
754 )));
755 }
756
757 let max_child_records = if depth == 1 {
760 max_leaf_records(header.node_size, header.record_size)
761 } else {
762 let leaf_max = max_leaf_records(header.node_size, header.record_size);
764 let mut prev_max = leaf_max;
765 for _ in 1..depth {
766 prev_max =
767 max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
768 }
769 prev_max
770 };
771 let nrec_bytes = num_records_size(max_child_records);
772
773 let mut node_records = Vec::with_capacity(num_records as usize);
785 for _ in 0..num_records {
786 let record = parse_record(
787 &mut cursor,
788 header.btree_type,
789 header.record_size,
790 offset_size,
791 length_size,
792 ndims,
793 chunk_dims,
794 heap_id_len,
795 )?;
796 node_records.push(record);
797 }
798
799 let num_children = num_records as usize + 1;
801
802 let has_total_records = depth > 1;
805 let total_nrec_bytes = if has_total_records {
807 length_size as usize
810 } else {
811 0
812 };
813
814 let mut child_addresses = Vec::with_capacity(num_children);
815 let mut child_nrecords = Vec::with_capacity(num_children);
816
817 for _ in 0..num_children {
818 let child_addr = cursor.read_offset(offset_size)?;
819 child_addresses.push(child_addr);
820 let nrec = cursor.read_uvar(nrec_bytes)?;
821 child_nrecords.push(nrec as u16);
822 if has_total_records {
823 cursor.read_uvar(total_nrec_bytes)?;
825 }
826 }
827
828 let compact_end = cursor.position();
829 verify_node_checksum(
830 &mut cursor,
831 start,
832 header.node_size,
833 compact_end,
834 "internal node",
835 )?;
836
837 let child_depth = depth - 1;
840 for (i, record) in node_records.iter().enumerate() {
841 let child_addr = child_addresses[i];
842 if !Cursor::is_undefined_offset(child_addr, offset_size)
843 && child_may_match_link_name_hash(&node_records, i, link_name_hash)
844 {
845 let child_nrec = child_nrecords[i];
846 if child_depth == 0 {
847 enter_btree_v2_node(visited, child_addr)?;
848 let mut child_cursor = Cursor::new(data);
849 child_cursor.set_position(child_addr);
850 parse_leaf_node(
851 &mut child_cursor,
852 header,
853 offset_size,
854 length_size,
855 ndims,
856 chunk_dims,
857 chunk_bounds,
858 link_name_hash,
859 child_nrec,
860 heap_id_len,
861 records,
862 )?;
863 } else {
864 parse_internal_node(
865 data,
866 child_addr,
867 header,
868 offset_size,
869 length_size,
870 ndims,
871 chunk_dims,
872 chunk_bounds,
873 link_name_hash,
874 child_nrec,
875 child_depth,
876 heap_id_len,
877 visited,
878 records,
879 )?;
880 }
881 }
882
883 if record_matches_query(record, chunk_dims, chunk_bounds, link_name_hash) {
884 records.push(record.clone());
885 }
886 }
887
888 let final_child_index = node_records.len();
889 let final_child_addr = child_addresses[final_child_index];
890 if !Cursor::is_undefined_offset(final_child_addr, offset_size)
891 && child_may_match_link_name_hash(&node_records, final_child_index, link_name_hash)
892 {
893 let child_nrec = child_nrecords[final_child_index];
894 if child_depth == 0 {
895 enter_btree_v2_node(visited, final_child_addr)?;
896 let mut child_cursor = Cursor::new(data);
897 child_cursor.set_position(final_child_addr);
898 parse_leaf_node(
899 &mut child_cursor,
900 header,
901 offset_size,
902 length_size,
903 ndims,
904 chunk_dims,
905 chunk_bounds,
906 link_name_hash,
907 child_nrec,
908 heap_id_len,
909 records,
910 )?;
911 } else {
912 parse_internal_node(
913 data,
914 final_child_addr,
915 header,
916 offset_size,
917 length_size,
918 ndims,
919 chunk_dims,
920 chunk_bounds,
921 link_name_hash,
922 child_nrec,
923 child_depth,
924 heap_id_len,
925 visited,
926 records,
927 )?;
928 }
929 }
930
931 Ok(())
932}
933
934pub fn collect_btree_v2_records(
943 data: &[u8],
944 header: &BTreeV2Header,
945 offset_size: u8,
946 length_size: u8,
947 ndims: Option<u32>,
948 chunk_dims: &[u32],
949 chunk_bounds: Option<(&[u64], &[u64])>,
950) -> Result<Vec<BTreeV2Record>> {
951 collect_btree_v2_records_with_link_hash(
952 data,
953 header,
954 offset_size,
955 length_size,
956 ndims,
957 chunk_dims,
958 chunk_bounds,
959 None,
960 )
961}
962
963#[allow(clippy::too_many_arguments)]
964fn collect_btree_v2_records_with_link_hash(
965 data: &[u8],
966 header: &BTreeV2Header,
967 offset_size: u8,
968 length_size: u8,
969 ndims: Option<u32>,
970 chunk_dims: &[u32],
971 chunk_bounds: Option<(&[u64], &[u64])>,
972 link_name_hash: Option<u32>,
973) -> Result<Vec<BTreeV2Record>> {
974 if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
975 return Ok(Vec::new());
976 }
977
978 if header.total_records == 0 || header.num_records_in_root == 0 {
979 return Ok(Vec::new());
980 }
981 validate_btree_v2_depth(header.depth)?;
982
983 let heap_id_len = compute_heap_id_len(header);
985
986 let mut records = Vec::new();
987 let mut visited = HashSet::new();
988
989 if header.depth == 0 {
990 enter_btree_v2_node(&mut visited, header.root_node_address)?;
992 let mut cursor = Cursor::new(data);
993 cursor.set_position(header.root_node_address);
994 parse_leaf_node(
995 &mut cursor,
996 header,
997 offset_size,
998 length_size,
999 ndims,
1000 chunk_dims,
1001 chunk_bounds,
1002 link_name_hash,
1003 header.num_records_in_root,
1004 heap_id_len,
1005 &mut records,
1006 )?;
1007 } else {
1008 parse_internal_node(
1010 data,
1011 header.root_node_address,
1012 header,
1013 offset_size,
1014 length_size,
1015 ndims,
1016 chunk_dims,
1017 chunk_bounds,
1018 link_name_hash,
1019 header.num_records_in_root,
1020 header.depth,
1021 heap_id_len,
1022 &mut visited,
1023 &mut records,
1024 )?;
1025 }
1026
1027 Ok(records)
1028}
1029
1030#[cfg(test)]
1032pub(crate) fn collect_btree_v2_link_name_hash_records(
1033 data: &[u8],
1034 header: &BTreeV2Header,
1035 offset_size: u8,
1036 length_size: u8,
1037 target_hash: u32,
1038) -> Result<Vec<BTreeV2Record>> {
1039 collect_btree_v2_records_with_link_hash(
1040 data,
1041 header,
1042 offset_size,
1043 length_size,
1044 None,
1045 &[],
1046 None,
1047 Some(target_hash),
1048 )
1049}
1050
1051pub fn collect_btree_v2_records_storage(
1053 storage: &dyn Storage,
1054 header: &BTreeV2Header,
1055 offset_size: u8,
1056 length_size: u8,
1057 ndims: Option<u32>,
1058 chunk_dims: &[u32],
1059 chunk_bounds: Option<(&[u64], &[u64])>,
1060) -> Result<Vec<BTreeV2Record>> {
1061 collect_btree_v2_records_storage_with_link_hash(
1062 storage,
1063 header,
1064 offset_size,
1065 length_size,
1066 ndims,
1067 chunk_dims,
1068 chunk_bounds,
1069 None,
1070 )
1071}
1072
1073#[allow(clippy::too_many_arguments)]
1074fn collect_btree_v2_records_storage_with_link_hash(
1075 storage: &dyn Storage,
1076 header: &BTreeV2Header,
1077 offset_size: u8,
1078 length_size: u8,
1079 ndims: Option<u32>,
1080 chunk_dims: &[u32],
1081 chunk_bounds: Option<(&[u64], &[u64])>,
1082 link_name_hash: Option<u32>,
1083) -> Result<Vec<BTreeV2Record>> {
1084 if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
1085 return Ok(Vec::new());
1086 }
1087
1088 if header.total_records == 0 || header.num_records_in_root == 0 {
1089 return Ok(Vec::new());
1090 }
1091 validate_btree_v2_depth(header.depth)?;
1092
1093 let heap_id_len = compute_heap_id_len(header);
1094 let mut records = Vec::new();
1095 let mut visited = HashSet::new();
1096
1097 if header.depth == 0 {
1098 enter_btree_v2_node(&mut visited, header.root_node_address)?;
1099 parse_leaf_node_storage(
1100 storage,
1101 header.root_node_address,
1102 header,
1103 offset_size,
1104 length_size,
1105 ndims,
1106 chunk_dims,
1107 chunk_bounds,
1108 link_name_hash,
1109 header.num_records_in_root,
1110 heap_id_len,
1111 &mut records,
1112 )?;
1113 } else {
1114 parse_internal_node_storage(
1115 storage,
1116 header.root_node_address,
1117 header,
1118 offset_size,
1119 length_size,
1120 ndims,
1121 chunk_dims,
1122 chunk_bounds,
1123 link_name_hash,
1124 header.num_records_in_root,
1125 header.depth,
1126 heap_id_len,
1127 &mut visited,
1128 &mut records,
1129 )?;
1130 }
1131
1132 Ok(records)
1133}
1134
1135pub(crate) fn collect_btree_v2_link_name_hash_records_storage(
1137 storage: &dyn Storage,
1138 header: &BTreeV2Header,
1139 offset_size: u8,
1140 length_size: u8,
1141 target_hash: u32,
1142) -> Result<Vec<BTreeV2Record>> {
1143 collect_btree_v2_records_storage_with_link_hash(
1144 storage,
1145 header,
1146 offset_size,
1147 length_size,
1148 None,
1149 &[],
1150 None,
1151 Some(target_hash),
1152 )
1153}
1154
1155#[allow(clippy::too_many_arguments)]
1156fn parse_leaf_node_storage(
1157 storage: &dyn Storage,
1158 address: u64,
1159 header: &BTreeV2Header,
1160 offset_size: u8,
1161 length_size: u8,
1162 ndims: Option<u32>,
1163 chunk_dims: &[u32],
1164 chunk_bounds: Option<(&[u64], &[u64])>,
1165 link_name_hash: Option<u32>,
1166 num_records: u16,
1167 heap_id_len: usize,
1168 records: &mut Vec<BTreeV2Record>,
1169) -> Result<()> {
1170 let node_len = usize::try_from(header.node_size).map_err(|_| {
1171 Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
1172 })?;
1173 let node_bytes = storage.read_range(address, node_len)?;
1174 let mut cursor = Cursor::new(node_bytes.as_ref());
1175 parse_leaf_node(
1176 &mut cursor,
1177 header,
1178 offset_size,
1179 length_size,
1180 ndims,
1181 chunk_dims,
1182 chunk_bounds,
1183 link_name_hash,
1184 num_records,
1185 heap_id_len,
1186 records,
1187 )
1188}
1189
1190#[allow(clippy::too_many_arguments)]
1191fn parse_internal_node_storage(
1192 storage: &dyn Storage,
1193 address: u64,
1194 header: &BTreeV2Header,
1195 offset_size: u8,
1196 length_size: u8,
1197 ndims: Option<u32>,
1198 chunk_dims: &[u32],
1199 chunk_bounds: Option<(&[u64], &[u64])>,
1200 link_name_hash: Option<u32>,
1201 num_records: u16,
1202 depth: u16,
1203 heap_id_len: usize,
1204 visited: &mut HashSet<u64>,
1205 records: &mut Vec<BTreeV2Record>,
1206) -> Result<()> {
1207 if depth == 0 {
1208 return Err(Error::InvalidData(
1209 "B-tree v2 internal node traversal reached depth zero".into(),
1210 ));
1211 }
1212 enter_btree_v2_node(visited, address)?;
1213
1214 let node_len = usize::try_from(header.node_size).map_err(|_| {
1215 Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
1216 })?;
1217 let node_bytes = storage.read_range(address, node_len)?;
1218 let mut cursor = Cursor::new(node_bytes.as_ref());
1219 let start = cursor.position();
1220
1221 let sig = cursor.read_bytes(4)?;
1222 if sig != BTIN_SIGNATURE {
1223 return Err(Error::InvalidBTreeV2Signature {
1224 context: "internal node",
1225 });
1226 }
1227
1228 let version = cursor.read_u8()?;
1229 if version != 0 {
1230 return Err(Error::UnsupportedBTreeVersion(version));
1231 }
1232
1233 let node_type = cursor.read_u8()?;
1234 if node_type != header.btree_type {
1235 return Err(Error::InvalidData(format!(
1236 "B-tree v2 internal node type mismatch: header says {}, node says {}",
1237 header.btree_type, node_type
1238 )));
1239 }
1240
1241 let max_child_records = if depth == 1 {
1242 max_leaf_records(header.node_size, header.record_size)
1243 } else {
1244 let leaf_max = max_leaf_records(header.node_size, header.record_size);
1245 let mut prev_max = leaf_max;
1246 for _ in 1..depth {
1247 prev_max =
1248 max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
1249 }
1250 prev_max
1251 };
1252 let nrec_bytes = num_records_size(max_child_records);
1253
1254 let mut node_records = Vec::with_capacity(num_records as usize);
1255 for _ in 0..num_records {
1256 let record = parse_record(
1257 &mut cursor,
1258 header.btree_type,
1259 header.record_size,
1260 offset_size,
1261 length_size,
1262 ndims,
1263 chunk_dims,
1264 heap_id_len,
1265 )?;
1266 node_records.push(record);
1267 }
1268
1269 let num_children = usize::from(num_records) + 1;
1270 let has_total_records = depth > 1;
1271 let total_nrec_bytes = if has_total_records {
1272 usize::from(length_size)
1273 } else {
1274 0
1275 };
1276
1277 let mut child_addresses = Vec::with_capacity(num_children);
1278 let mut child_nrecords = Vec::with_capacity(num_children);
1279 for _ in 0..num_children {
1280 child_addresses.push(cursor.read_offset(offset_size)?);
1281 child_nrecords.push(cursor.read_uvar(nrec_bytes)? as u16);
1282 if has_total_records {
1283 cursor.read_uvar(total_nrec_bytes)?;
1284 }
1285 }
1286
1287 let compact_end = cursor.position();
1288 verify_node_checksum(
1289 &mut cursor,
1290 start,
1291 header.node_size,
1292 compact_end,
1293 "internal node",
1294 )?;
1295
1296 let child_depth = depth - 1;
1297 for (i, record) in node_records.iter().enumerate() {
1298 let child_addr = child_addresses[i];
1299 if !Cursor::is_undefined_offset(child_addr, offset_size)
1300 && child_may_match_link_name_hash(&node_records, i, link_name_hash)
1301 {
1302 let child_nrec = child_nrecords[i];
1303 if child_depth == 0 {
1304 enter_btree_v2_node(visited, child_addr)?;
1305 parse_leaf_node_storage(
1306 storage,
1307 child_addr,
1308 header,
1309 offset_size,
1310 length_size,
1311 ndims,
1312 chunk_dims,
1313 chunk_bounds,
1314 link_name_hash,
1315 child_nrec,
1316 heap_id_len,
1317 records,
1318 )?;
1319 } else {
1320 parse_internal_node_storage(
1321 storage,
1322 child_addr,
1323 header,
1324 offset_size,
1325 length_size,
1326 ndims,
1327 chunk_dims,
1328 chunk_bounds,
1329 link_name_hash,
1330 child_nrec,
1331 child_depth,
1332 heap_id_len,
1333 visited,
1334 records,
1335 )?;
1336 }
1337 }
1338
1339 if record_matches_query(record, chunk_dims, chunk_bounds, link_name_hash) {
1340 records.push(record.clone());
1341 }
1342 }
1343
1344 let final_child_index = node_records.len();
1345 let final_child_addr = child_addresses[final_child_index];
1346 if !Cursor::is_undefined_offset(final_child_addr, offset_size)
1347 && child_may_match_link_name_hash(&node_records, final_child_index, link_name_hash)
1348 {
1349 let child_nrec = child_nrecords[final_child_index];
1350 if child_depth == 0 {
1351 enter_btree_v2_node(visited, final_child_addr)?;
1352 parse_leaf_node_storage(
1353 storage,
1354 final_child_addr,
1355 header,
1356 offset_size,
1357 length_size,
1358 ndims,
1359 chunk_dims,
1360 chunk_bounds,
1361 link_name_hash,
1362 child_nrec,
1363 heap_id_len,
1364 records,
1365 )?;
1366 } else {
1367 parse_internal_node_storage(
1368 storage,
1369 final_child_addr,
1370 header,
1371 offset_size,
1372 length_size,
1373 ndims,
1374 chunk_dims,
1375 chunk_bounds,
1376 link_name_hash,
1377 child_nrec,
1378 child_depth,
1379 heap_id_len,
1380 visited,
1381 records,
1382 )?;
1383 }
1384 }
1385
1386 Ok(())
1387}
1388
1389fn compute_heap_id_len(header: &BTreeV2Header) -> usize {
1395 let rs = header.record_size as usize;
1396 match header.btree_type {
1397 5 => rs.saturating_sub(4), 6 => rs.saturating_sub(8), 8 => rs.saturating_sub(4 + 1 + 4), 9 => rs.saturating_sub(4), _ => 0,
1402 }
1403}
1404
1405#[cfg(test)]
1406mod tests {
1407 use super::*;
1408
1409 #[allow(clippy::too_many_arguments)]
1411 fn build_header(
1412 btree_type: u8,
1413 node_size: u32,
1414 record_size: u16,
1415 depth: u16,
1416 root_node_address: u64,
1417 num_records_in_root: u16,
1418 total_records: u64,
1419 offset_size: u8,
1420 length_size: u8,
1421 ) -> Vec<u8> {
1422 let mut buf = Vec::new();
1423 buf.extend_from_slice(b"BTHD");
1424 buf.push(0); buf.push(btree_type);
1426 buf.extend_from_slice(&node_size.to_le_bytes());
1427 buf.extend_from_slice(&record_size.to_le_bytes());
1428 buf.extend_from_slice(&depth.to_le_bytes());
1429 buf.push(75); buf.push(40); match offset_size {
1432 4 => buf.extend_from_slice(&(root_node_address as u32).to_le_bytes()),
1433 8 => buf.extend_from_slice(&root_node_address.to_le_bytes()),
1434 _ => panic!("unsupported"),
1435 }
1436 buf.extend_from_slice(&num_records_in_root.to_le_bytes());
1437 match length_size {
1438 4 => buf.extend_from_slice(&(total_records as u32).to_le_bytes()),
1439 8 => buf.extend_from_slice(&total_records.to_le_bytes()),
1440 _ => panic!("unsupported"),
1441 }
1442 let checksum = jenkins_lookup3(&buf);
1444 buf.extend_from_slice(&checksum.to_le_bytes());
1445 buf
1446 }
1447
1448 #[test]
1449 fn parse_header() {
1450 let data = build_header(5, 4096, 12, 0, 0x1000, 3, 3, 8, 8);
1451 let mut cursor = Cursor::new(&data);
1452 let hdr = BTreeV2Header::parse(&mut cursor, 8, 8).unwrap();
1453
1454 assert_eq!(hdr.btree_type, 5);
1455 assert_eq!(hdr.node_size, 4096);
1456 assert_eq!(hdr.record_size, 12);
1457 assert_eq!(hdr.depth, 0);
1458 assert_eq!(hdr.split_percent, 75);
1459 assert_eq!(hdr.merge_percent, 40);
1460 assert_eq!(hdr.root_node_address, 0x1000);
1461 assert_eq!(hdr.num_records_in_root, 3);
1462 assert_eq!(hdr.total_records, 3);
1463 }
1464
1465 #[test]
1466 fn bad_signature() {
1467 let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1468 data[0] = b'X';
1469 let mut cursor = Cursor::new(&data);
1470 assert!(matches!(
1471 BTreeV2Header::parse(&mut cursor, 8, 8),
1472 Err(Error::InvalidBTreeV2Signature { .. })
1473 ));
1474 }
1475
1476 #[test]
1477 fn bad_checksum() {
1478 let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1479 data[6] = 0xFF;
1481 let mut cursor = Cursor::new(&data);
1482 assert!(matches!(
1483 BTreeV2Header::parse(&mut cursor, 8, 8),
1484 Err(Error::ChecksumMismatch { .. })
1485 ));
1486 }
1487
1488 #[test]
1489 fn collect_empty_tree() {
1490 let header = BTreeV2Header {
1491 btree_type: 5,
1492 node_size: 4096,
1493 record_size: 12,
1494 depth: 0,
1495 split_percent: 75,
1496 merge_percent: 40,
1497 root_node_address: u64::MAX,
1498 num_records_in_root: 0,
1499 total_records: 0,
1500 };
1501 let data = vec![0u8; 100];
1502 let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1503 assert!(records.is_empty());
1504 }
1505
1506 #[test]
1507 fn heap_id_len_uses_record_type_sizes() {
1508 let h5 = BTreeV2Header {
1510 btree_type: 5,
1511 record_size: 12,
1512 node_size: 0,
1513 depth: 0,
1514 split_percent: 0,
1515 merge_percent: 0,
1516 root_node_address: 0,
1517 num_records_in_root: 0,
1518 total_records: 0,
1519 };
1520 assert_eq!(compute_heap_id_len(&h5), 8);
1521
1522 let h8 = BTreeV2Header {
1524 btree_type: 8,
1525 record_size: 17,
1526 ..h5
1527 };
1528 assert_eq!(compute_heap_id_len(&h8), 8);
1529 }
1530
1531 #[test]
1532 fn max_leaf_records_accounts_for_node_overhead() {
1533 assert_eq!(max_leaf_records(4096, 12), 340);
1536 }
1537
1538 #[test]
1539 fn record_count_size_scales_at_integer_boundaries() {
1540 assert_eq!(num_records_size(0), 1);
1541 assert_eq!(num_records_size(255), 1);
1542 assert_eq!(num_records_size(256), 2);
1543 assert_eq!(num_records_size(65535), 2);
1544 assert_eq!(num_records_size(65536), 4);
1545 }
1546
1547 #[test]
1548 fn parse_huge_indirect_record() {
1549 let mut data = Vec::new();
1550 data.extend_from_slice(&0x1234u64.to_le_bytes());
1551 data.extend_from_slice(&99u64.to_le_bytes());
1552 data.extend_from_slice(&7u64.to_le_bytes());
1553 let mut cursor = Cursor::new(&data);
1554
1555 let record = parse_record(&mut cursor, 1, data.len() as u16, 8, 8, None, &[], 0).unwrap();
1556 match record {
1557 BTreeV2Record::HugeIndirectNonFiltered {
1558 address,
1559 length,
1560 object_id,
1561 } => {
1562 assert_eq!(address, 0x1234);
1563 assert_eq!(length, 99);
1564 assert_eq!(object_id, 7);
1565 }
1566 other => panic!("expected huge record, got {:?}", other),
1567 }
1568 }
1569
1570 #[test]
1571 fn parse_shared_heap_record() {
1572 let mut data = Vec::new();
1573 data.push(0);
1574 data.extend_from_slice(&[0, 0, 0]);
1575 data.extend_from_slice(&0xAABB_CCDDu32.to_le_bytes());
1576 data.extend_from_slice(&3u32.to_le_bytes());
1577 data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1578 let mut cursor = Cursor::new(&data);
1579
1580 let record = parse_record(&mut cursor, 7, data.len() as u16, 8, 8, None, &[], 0).unwrap();
1581 match record {
1582 BTreeV2Record::SharedMessageHeap {
1583 hash,
1584 reference_count,
1585 heap_id,
1586 } => {
1587 assert_eq!(hash, 0xAABB_CCDD);
1588 assert_eq!(reference_count, 3);
1589 assert_eq!(heap_id, vec![1, 2, 3, 4, 5, 6, 7, 8]);
1590 }
1591 other => panic!("expected shared heap record, got {:?}", other),
1592 }
1593 }
1594
1595 #[test]
1596 fn parse_record_rejects_known_record_that_exceeds_record_size() {
1597 let mut data = Vec::new();
1598 data.extend_from_slice(&0x1234u64.to_le_bytes());
1599 data.extend_from_slice(&99u64.to_le_bytes());
1600 data.extend_from_slice(&7u64.to_le_bytes());
1601 let mut cursor = Cursor::new(&data);
1602
1603 let err = parse_record(&mut cursor, 1, 16, 8, 8, None, &[], 0).unwrap_err();
1604 assert!(
1605 matches!(err, Error::InvalidData(message) if message.contains("consumed 24 bytes but record size is 16"))
1606 );
1607 }
1608
1609 #[test]
1610 fn parse_chunk_record_rejects_size_shorter_than_address() {
1611 let mut data = Vec::new();
1612 data.extend_from_slice(&0x1234u64.to_le_bytes());
1613 let mut cursor = Cursor::new(&data);
1614
1615 let err = parse_record(&mut cursor, 10, 4, 8, 8, None, &[], 0).unwrap_err();
1616 assert!(
1617 matches!(err, Error::InvalidData(message) if message.contains("shorter than its address"))
1618 );
1619 assert_eq!(cursor.position(), 0);
1620 }
1621
1622 #[test]
1623 fn parse_filtered_chunk_record_rejects_size_shorter_than_fixed_fields() {
1624 let mut data = Vec::new();
1625 data.extend_from_slice(&0x1234u64.to_le_bytes());
1626 data.extend_from_slice(&99u64.to_le_bytes());
1627 data.extend_from_slice(&0xAu32.to_le_bytes());
1628 let mut cursor = Cursor::new(&data);
1629
1630 let err = parse_record(&mut cursor, 11, 16, 8, 8, None, &[], 0).unwrap_err();
1631 assert!(
1632 matches!(err, Error::InvalidData(message) if message.contains("shorter than its fixed fields"))
1633 );
1634 assert_eq!(cursor.position(), 0);
1635 }
1636
1637 #[test]
1638 fn parse_chunk_record_scales_offsets_by_chunk_dimensions() {
1639 let mut data = Vec::new();
1640 data.extend_from_slice(&0x1234u64.to_le_bytes());
1641 data.extend_from_slice(&2u64.to_le_bytes());
1642 data.extend_from_slice(&1u64.to_le_bytes());
1643 let mut cursor = Cursor::new(&data);
1644
1645 let record = parse_record(
1646 &mut cursor,
1647 10,
1648 data.len() as u16,
1649 8,
1650 8,
1651 Some(2),
1652 &[5, 7],
1653 0,
1654 )
1655 .unwrap();
1656 match record {
1657 BTreeV2Record::ChunkedNonFiltered { address, offsets } => {
1658 assert_eq!(address, 0x1234);
1659 assert_eq!(offsets, vec![10, 7]);
1660 }
1661 other => panic!("expected non-filtered chunk record, got {:?}", other),
1662 }
1663 }
1664
1665 #[test]
1666 fn parse_filtered_chunk_record_scales_offsets_by_chunk_dimensions() {
1667 let mut data = Vec::new();
1668 data.extend_from_slice(&0x1234u64.to_le_bytes());
1669 data.extend_from_slice(&99u64.to_le_bytes());
1670 data.extend_from_slice(&0xAu32.to_le_bytes());
1671 data.extend_from_slice(&3u64.to_le_bytes());
1672 data.extend_from_slice(&4u64.to_le_bytes());
1673 let mut cursor = Cursor::new(&data);
1674
1675 let record = parse_record(
1676 &mut cursor,
1677 11,
1678 data.len() as u16,
1679 8,
1680 8,
1681 Some(2),
1682 &[5, 7],
1683 0,
1684 )
1685 .unwrap();
1686 match record {
1687 BTreeV2Record::ChunkedFiltered {
1688 address,
1689 chunk_size,
1690 filter_mask,
1691 offsets,
1692 } => {
1693 assert_eq!(address, 0x1234);
1694 assert_eq!(chunk_size, 99);
1695 assert_eq!(filter_mask, 0xA);
1696 assert_eq!(offsets, vec![15, 28]);
1697 }
1698 other => panic!("expected filtered chunk record, got {:?}", other),
1699 }
1700 }
1701
1702 #[test]
1703 fn parse_leaf_with_type5_records() {
1704 let record_size: u16 = 12;
1707 let node_size: u32 = 4096;
1708
1709 let header = BTreeV2Header {
1710 btree_type: 5,
1711 node_size,
1712 record_size,
1713 depth: 0,
1714 split_percent: 75,
1715 merge_percent: 40,
1716 root_node_address: 0, num_records_in_root: 2,
1718 total_records: 2,
1719 };
1720
1721 let mut leaf = Vec::new();
1723 leaf.extend_from_slice(b"BTLF"); leaf.push(0); leaf.push(5); leaf.extend_from_slice(&0xAABBCCDDu32.to_le_bytes());
1729 leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1730
1731 leaf.extend_from_slice(&0x11223344u32.to_le_bytes());
1733 leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1734
1735 let checksum = jenkins_lookup3(&leaf);
1738 leaf.extend_from_slice(&checksum.to_le_bytes());
1739 leaf.resize(node_size as usize, 0);
1740
1741 let mut records = Vec::new();
1742 let mut cursor = Cursor::new(&leaf);
1743 parse_leaf_node(
1744 &mut cursor,
1745 &header,
1746 8,
1747 8,
1748 None,
1749 &[],
1750 None,
1751 None,
1752 2,
1753 8, &mut records,
1755 )
1756 .unwrap();
1757
1758 assert_eq!(records.len(), 2);
1759 match &records[0] {
1760 BTreeV2Record::LinkNameHash { hash, heap_id } => {
1761 assert_eq!(*hash, 0xAABBCCDD);
1762 assert_eq!(heap_id, &[1, 2, 3, 4, 5, 6, 7, 8]);
1763 }
1764 _ => panic!("expected LinkNameHash"),
1765 }
1766 match &records[1] {
1767 BTreeV2Record::LinkNameHash { hash, heap_id } => {
1768 assert_eq!(*hash, 0x11223344);
1769 assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1770 }
1771 _ => panic!("expected LinkNameHash"),
1772 }
1773 }
1774
1775 fn build_type5_record(hash: u32, heap_id: [u8; 8]) -> Vec<u8> {
1776 let mut record = Vec::new();
1777 record.extend_from_slice(&hash.to_le_bytes());
1778 record.extend_from_slice(&heap_id);
1779 record
1780 }
1781
1782 fn build_type5_leaf(records: &[(u32, [u8; 8])], node_size: usize) -> Vec<u8> {
1783 let mut leaf = Vec::new();
1784 leaf.extend_from_slice(b"BTLF");
1785 leaf.push(0);
1786 leaf.push(5);
1787 for &(hash, heap_id) in records {
1788 leaf.extend_from_slice(&build_type5_record(hash, heap_id));
1789 }
1790 let checksum = jenkins_lookup3(&leaf);
1791 leaf.extend_from_slice(&checksum.to_le_bytes());
1792 leaf.resize(node_size, 0);
1793 leaf
1794 }
1795
1796 fn build_type5_internal(
1797 records: &[(u32, [u8; 8])],
1798 children: &[(u64, u8)],
1799 node_size: usize,
1800 ) -> Vec<u8> {
1801 assert_eq!(children.len(), records.len() + 1);
1802
1803 let mut node = Vec::new();
1804 node.extend_from_slice(b"BTIN");
1805 node.push(0);
1806 node.push(5);
1807 for &(hash, heap_id) in records {
1808 node.extend_from_slice(&build_type5_record(hash, heap_id));
1809 }
1810 for &(address, child_records) in children {
1811 node.extend_from_slice(&address.to_le_bytes());
1812 node.push(child_records);
1813 }
1814 let checksum = jenkins_lookup3(&node);
1815 node.extend_from_slice(&checksum.to_le_bytes());
1816 node.resize(node_size, 0);
1817 node
1818 }
1819
1820 fn link_hashes(records: &[BTreeV2Record]) -> Vec<u32> {
1821 records
1822 .iter()
1823 .map(|record| match record {
1824 BTreeV2Record::LinkNameHash { hash, .. } => *hash,
1825 other => panic!("expected LinkNameHash, got {other:?}"),
1826 })
1827 .collect()
1828 }
1829
1830 #[test]
1831 fn collect_depth_one_tree_includes_internal_records_in_order() {
1832 let node_size = 128usize;
1836 let left_addr = node_size as u64;
1837 let right_addr = (node_size * 2) as u64;
1838 let header = BTreeV2Header {
1839 btree_type: 5,
1840 node_size: node_size as u32,
1841 record_size: 12,
1842 depth: 1,
1843 split_percent: 75,
1844 merge_percent: 40,
1845 root_node_address: 0,
1846 num_records_in_root: 1,
1847 total_records: 3,
1848 };
1849
1850 let root = build_type5_internal(
1851 &[(20, [20; 8])],
1852 &[(left_addr, 1), (right_addr, 1)],
1853 node_size,
1854 );
1855 let left_leaf = build_type5_leaf(&[(10, [10; 8])], node_size);
1856 let right_leaf = build_type5_leaf(&[(30, [30; 8])], node_size);
1857
1858 let mut data = vec![0; node_size * 3];
1859 data[0..node_size].copy_from_slice(&root);
1860 data[node_size..node_size * 2].copy_from_slice(&left_leaf);
1861 data[node_size * 2..node_size * 3].copy_from_slice(&right_leaf);
1862
1863 let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1864 assert_eq!(link_hashes(&records), vec![10, 20, 30]);
1865
1866 let storage = crate::storage::BytesStorage::new(data);
1867 let records =
1868 collect_btree_v2_records_storage(&storage, &header, 8, 8, None, &[], None).unwrap();
1869 assert_eq!(link_hashes(&records), vec![10, 20, 30]);
1870 }
1871
1872 #[test]
1873 fn collect_rejects_depth_above_traversal_limit() {
1874 let header = BTreeV2Header {
1875 btree_type: 5,
1876 node_size: 128,
1877 record_size: 12,
1878 depth: MAX_BTREE_V2_DEPTH + 1,
1879 split_percent: 75,
1880 merge_percent: 40,
1881 root_node_address: 0,
1882 num_records_in_root: 1,
1883 total_records: 1,
1884 };
1885 let data = vec![0; 128];
1886
1887 let err = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap_err();
1888 assert!(
1889 matches!(err, Error::InvalidData(message) if message.contains("exceeds traversal limit"))
1890 );
1891 }
1892
1893 #[test]
1894 fn collect_rejects_revisited_leaf_node() {
1895 let node_size = 128usize;
1896 let leaf_addr = node_size as u64;
1897 let header = BTreeV2Header {
1898 btree_type: 5,
1899 node_size: node_size as u32,
1900 record_size: 12,
1901 depth: 1,
1902 split_percent: 75,
1903 merge_percent: 40,
1904 root_node_address: 0,
1905 num_records_in_root: 1,
1906 total_records: 3,
1907 };
1908
1909 let root = build_type5_internal(
1910 &[(20, [20; 8])],
1911 &[(leaf_addr, 1), (leaf_addr, 1)],
1912 node_size,
1913 );
1914 let leaf = build_type5_leaf(&[(10, [10; 8])], node_size);
1915
1916 let mut data = vec![0; node_size * 2];
1917 data[0..node_size].copy_from_slice(&root);
1918 data[node_size..node_size * 2].copy_from_slice(&leaf);
1919
1920 let err = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap_err();
1921 assert!(matches!(err, Error::InvalidData(message) if message.contains("revisits node")));
1922
1923 let storage = crate::storage::BytesStorage::new(data);
1924 let err =
1925 collect_btree_v2_records_storage(&storage, &header, 8, 8, None, &[], None).unwrap_err();
1926 assert!(matches!(err, Error::InvalidData(message) if message.contains("revisits node")));
1927 }
1928
1929 #[test]
1930 fn collect_link_name_hash_records_filters_leaf_records() {
1931 let record_size: u16 = 12;
1932 let node_size: u32 = 4096;
1933 let header = BTreeV2Header {
1934 btree_type: 5,
1935 node_size,
1936 record_size,
1937 depth: 0,
1938 split_percent: 75,
1939 merge_percent: 40,
1940 root_node_address: 0,
1941 num_records_in_root: 2,
1942 total_records: 2,
1943 };
1944
1945 let mut leaf = Vec::new();
1946 leaf.extend_from_slice(b"BTLF");
1947 leaf.push(0);
1948 leaf.push(5);
1949 leaf.extend_from_slice(&0x1111_1111u32.to_le_bytes());
1950 leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1951 leaf.extend_from_slice(&0x2222_2222u32.to_le_bytes());
1952 leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1953 let checksum = jenkins_lookup3(&leaf);
1954 leaf.extend_from_slice(&checksum.to_le_bytes());
1955 leaf.resize(node_size as usize, 0);
1956
1957 let records =
1958 collect_btree_v2_link_name_hash_records(&leaf, &header, 8, 8, 0x2222_2222).unwrap();
1959 assert_eq!(records.len(), 1);
1960 match &records[0] {
1961 BTreeV2Record::LinkNameHash { hash, heap_id } => {
1962 assert_eq!(*hash, 0x2222_2222);
1963 assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1964 }
1965 other => panic!("expected LinkNameHash, got {other:?}"),
1966 }
1967 }
1968
1969 #[test]
1970 fn child_link_hash_filter_prunes_disjoint_hash_ranges() {
1971 let records = vec![
1972 BTreeV2Record::LinkNameHash {
1973 hash: 10,
1974 heap_id: vec![],
1975 },
1976 BTreeV2Record::LinkNameHash {
1977 hash: 20,
1978 heap_id: vec![],
1979 },
1980 ];
1981
1982 assert!(child_may_match_link_name_hash(&records, 0, Some(10)));
1983 assert!(!child_may_match_link_name_hash(&records, 0, Some(15)));
1984 assert!(child_may_match_link_name_hash(&records, 1, Some(15)));
1985 assert!(!child_may_match_link_name_hash(&records, 2, Some(15)));
1986 assert!(child_may_match_link_name_hash(&records, 2, Some(20)));
1987 }
1988}