1use crate::checksum::jenkins_lookup3;
12use crate::error::{Error, Result};
13use crate::io::Cursor;
14
15const BTHD_SIGNATURE: [u8; 4] = *b"BTHD";
20const BTIN_SIGNATURE: [u8; 4] = *b"BTIN";
21const BTLF_SIGNATURE: [u8; 4] = *b"BTLF";
22
23#[derive(Debug, Clone)]
29pub struct BTreeV2Header {
30 pub btree_type: u8,
32 pub node_size: u32,
34 pub record_size: u16,
36 pub depth: u16,
38 pub split_percent: u8,
40 pub merge_percent: u8,
42 pub root_node_address: u64,
44 pub num_records_in_root: u16,
46 pub total_records: u64,
48}
49
50impl BTreeV2Header {
51 pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
67 let start = cursor.position();
68
69 let sig = cursor.read_bytes(4)?;
70 if sig != BTHD_SIGNATURE {
71 return Err(Error::InvalidBTreeV2Signature { context: "header" });
72 }
73
74 let version = cursor.read_u8()?;
75 if version != 0 {
76 return Err(Error::UnsupportedBTreeVersion(version));
77 }
78
79 let btree_type = cursor.read_u8()?;
80 let node_size = cursor.read_u32_le()?;
81 let record_size = cursor.read_u16_le()?;
82 let depth = cursor.read_u16_le()?;
83 let split_percent = cursor.read_u8()?;
84 let merge_percent = cursor.read_u8()?;
85 let root_node_address = cursor.read_offset(offset_size)?;
86 let num_records_in_root = cursor.read_u16_le()?;
87 let total_records = cursor.read_length(length_size)?;
88
89 let checksum_end = cursor.position();
91 let stored_checksum = cursor.read_u32_le()?;
92
93 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_end as usize]);
94 if computed != stored_checksum {
95 return Err(Error::ChecksumMismatch {
96 expected: stored_checksum,
97 actual: computed,
98 });
99 }
100
101 Ok(BTreeV2Header {
102 btree_type,
103 node_size,
104 record_size,
105 depth,
106 split_percent,
107 merge_percent,
108 root_node_address,
109 num_records_in_root,
110 total_records,
111 })
112 }
113}
114
115#[derive(Debug, Clone)]
123pub enum BTreeV2Record {
124 LinkNameHash { hash: u32, heap_id: Vec<u8> },
126 CreationOrder { order: u64, heap_id: Vec<u8> },
128 AttributeNameHash {
130 hash: u32,
131 flags: u8,
132 creation_order: u32,
133 heap_id: Vec<u8>,
134 },
135 AttributeCreationOrder { order: u32, heap_id: Vec<u8> },
137 ChunkedNonFiltered { address: u64, offsets: Vec<u64> },
139 ChunkedFiltered {
141 address: u64,
142 chunk_size: u64,
143 filter_mask: u32,
144 offsets: Vec<u64>,
145 },
146 Unknown { record_type: u8, data: Vec<u8> },
148}
149
150fn parse_record(
156 cursor: &mut Cursor,
157 btree_type: u8,
158 record_size: u16,
159 offset_size: u8,
160 length_size: u8,
161 _ndims: Option<u32>,
162 heap_id_len: usize,
163) -> Result<BTreeV2Record> {
164 let record_start = cursor.position();
165
166 let record = match btree_type {
167 5 => {
169 let hash = cursor.read_u32_le()?;
170 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
171 BTreeV2Record::LinkNameHash { hash, heap_id }
172 }
173
174 6 => {
176 let order = cursor.read_u64_le()?;
177 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
178 BTreeV2Record::CreationOrder { order, heap_id }
179 }
180
181 8 => {
183 let hash = cursor.read_u32_le()?;
184 let flags = cursor.read_u8()?;
185 let creation_order = cursor.read_u32_le()?;
186 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
187 BTreeV2Record::AttributeNameHash {
188 hash,
189 flags,
190 creation_order,
191 heap_id,
192 }
193 }
194
195 9 => {
197 let order = cursor.read_u32_le()?;
198 let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
199 BTreeV2Record::AttributeCreationOrder { order, heap_id }
200 }
201
202 10 => {
204 let address = cursor.read_offset(offset_size)?;
205 let offset_bytes_available = record_size as usize - offset_size as usize;
209 let num_offsets = offset_bytes_available / 8;
210 let mut offsets = Vec::with_capacity(num_offsets);
211 for _ in 0..num_offsets {
212 offsets.push(cursor.read_u64_le()?);
213 }
214 BTreeV2Record::ChunkedNonFiltered { address, offsets }
215 }
216
217 11 => {
219 let address = cursor.read_offset(offset_size)?;
220 let nbytes_size = length_size as usize;
222 let chunk_size = cursor.read_length(length_size)?;
223 let filter_mask = cursor.read_u32_le()?;
224 let remaining = record_size as usize - offset_size as usize - nbytes_size - 4; let num_offsets = remaining / 8;
226 let mut offsets = Vec::with_capacity(num_offsets);
227 for _ in 0..num_offsets {
228 offsets.push(cursor.read_u64_le()?);
229 }
230 BTreeV2Record::ChunkedFiltered {
231 address,
232 chunk_size,
233 filter_mask,
234 offsets,
235 }
236 }
237
238 _ => {
240 let data = cursor.read_bytes(record_size as usize)?.to_vec();
241 return Ok(BTreeV2Record::Unknown {
242 record_type: btree_type,
243 data,
244 });
245 }
246 };
247
248 let consumed = (cursor.position() - record_start) as usize;
250 if consumed < record_size as usize {
251 cursor.skip(record_size as usize - consumed)?;
252 }
253
254 Ok(record)
255}
256
257fn record_matches_chunk_bounds(
258 record: &BTreeV2Record,
259 chunk_dims: &[u32],
260 chunk_bounds: Option<(&[u64], &[u64])>,
261) -> bool {
262 let Some((first_chunk, last_chunk)) = chunk_bounds else {
263 return true;
264 };
265
266 let offsets = match record {
267 BTreeV2Record::ChunkedNonFiltered { offsets, .. }
268 | BTreeV2Record::ChunkedFiltered { offsets, .. } => offsets,
269 _ => return true,
270 };
271
272 offsets.iter().enumerate().all(|(dim, offset)| {
273 let chunk_index = *offset / u64::from(chunk_dims[dim]);
274 chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
275 })
276}
277
278fn num_records_size(max_records: u64) -> usize {
285 if max_records <= 0xFF {
286 1
287 } else if max_records <= 0xFFFF {
288 2
289 } else if max_records <= 0xFFFF_FFFF {
290 4
291 } else {
292 8
293 }
294}
295
296fn max_leaf_records(node_size: u32, record_size: u16) -> u64 {
298 let overhead = 10u32;
300 if node_size <= overhead || record_size == 0 {
301 return 0;
302 }
303 ((node_size - overhead) / record_size as u32) as u64
304}
305
306fn max_internal_records(
311 node_size: u32,
312 record_size: u16,
313 offset_size: u8,
314 max_child_records: u64,
315) -> u64 {
316 let overhead = 10u32;
318 if node_size <= overhead || record_size == 0 {
319 return 0;
320 }
321 let available = (node_size - overhead) as u64;
322 let nrec_size = num_records_size(max_child_records) as u64;
324 let entry_size = record_size as u64 + offset_size as u64 + nrec_size;
325 let extra = offset_size as u64 + nrec_size;
329 if available <= extra {
330 return 0;
331 }
332 (available - extra) / entry_size
333}
334
335#[allow(clippy::too_many_arguments)]
337fn parse_leaf_node(
338 cursor: &mut Cursor,
339 header: &BTreeV2Header,
340 offset_size: u8,
341 length_size: u8,
342 ndims: Option<u32>,
343 chunk_dims: &[u32],
344 chunk_bounds: Option<(&[u64], &[u64])>,
345 num_records: u16,
346 heap_id_len: usize,
347 records: &mut Vec<BTreeV2Record>,
348) -> Result<()> {
349 let start = cursor.position();
350
351 let sig = cursor.read_bytes(4)?;
352 if sig != BTLF_SIGNATURE {
353 return Err(Error::InvalidBTreeV2Signature {
354 context: "leaf node",
355 });
356 }
357
358 let version = cursor.read_u8()?;
359 if version != 0 {
360 return Err(Error::UnsupportedBTreeVersion(version));
361 }
362
363 let node_type = cursor.read_u8()?;
364 if node_type != header.btree_type {
365 return Err(Error::InvalidData(format!(
366 "B-tree v2 leaf node type mismatch: header says {}, node says {}",
367 header.btree_type, node_type
368 )));
369 }
370
371 for _ in 0..num_records {
372 let record = parse_record(
373 cursor,
374 header.btree_type,
375 header.record_size,
376 offset_size,
377 length_size,
378 ndims,
379 heap_id_len,
380 )?;
381 if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
382 records.push(record);
383 }
384 }
385
386 let checksum_data_end = cursor.position();
388 let stored_checksum = cursor.read_u32_le()?;
389 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
390 if computed != stored_checksum {
391 return Err(Error::ChecksumMismatch {
392 expected: stored_checksum,
393 actual: computed,
394 });
395 }
396
397 Ok(())
398}
399
400#[allow(clippy::too_many_arguments)]
402fn parse_internal_node(
403 data: &[u8],
404 address: u64,
405 header: &BTreeV2Header,
406 offset_size: u8,
407 length_size: u8,
408 ndims: Option<u32>,
409 chunk_dims: &[u32],
410 chunk_bounds: Option<(&[u64], &[u64])>,
411 num_records: u16,
412 depth: u16,
413 heap_id_len: usize,
414 records: &mut Vec<BTreeV2Record>,
415) -> Result<()> {
416 let mut cursor = Cursor::new(data);
417 cursor.set_position(address);
418
419 let start = cursor.position();
420
421 let sig = cursor.read_bytes(4)?;
422 if sig != BTIN_SIGNATURE {
423 return Err(Error::InvalidBTreeV2Signature {
424 context: "internal node",
425 });
426 }
427
428 let version = cursor.read_u8()?;
429 if version != 0 {
430 return Err(Error::UnsupportedBTreeVersion(version));
431 }
432
433 let node_type = cursor.read_u8()?;
434 if node_type != header.btree_type {
435 return Err(Error::InvalidData(format!(
436 "B-tree v2 internal node type mismatch: header says {}, node says {}",
437 header.btree_type, node_type
438 )));
439 }
440
441 let max_child_records = if depth == 1 {
444 max_leaf_records(header.node_size, header.record_size)
445 } else {
446 let leaf_max = max_leaf_records(header.node_size, header.record_size);
448 let mut prev_max = leaf_max;
449 for _ in 1..depth {
450 prev_max =
451 max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
452 }
453 prev_max
454 };
455 let nrec_bytes = num_records_size(max_child_records);
456
457 let mut node_records = Vec::with_capacity(num_records as usize);
469 for _ in 0..num_records {
470 let record = parse_record(
471 &mut cursor,
472 header.btree_type,
473 header.record_size,
474 offset_size,
475 length_size,
476 ndims,
477 heap_id_len,
478 )?;
479 if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
480 node_records.push(record);
481 }
482 }
483
484 let num_children = num_records as usize + 1;
486
487 let has_total_records = depth > 1;
490 let total_nrec_bytes = if has_total_records {
492 length_size as usize
495 } else {
496 0
497 };
498
499 let mut child_addresses = Vec::with_capacity(num_children);
500 let mut child_nrecords = Vec::with_capacity(num_children);
501
502 for _ in 0..num_children {
503 let child_addr = cursor.read_offset(offset_size)?;
504 child_addresses.push(child_addr);
505 let nrec = cursor.read_uvar(nrec_bytes)?;
506 child_nrecords.push(nrec as u16);
507 if has_total_records {
508 cursor.read_uvar(total_nrec_bytes)?;
510 }
511 }
512
513 let checksum_data_end = cursor.position();
515 let stored_checksum = cursor.read_u32_le()?;
516 let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
517 if computed != stored_checksum {
518 return Err(Error::ChecksumMismatch {
519 expected: stored_checksum,
520 actual: computed,
521 });
522 }
523
524 records.extend(node_records);
530
531 let child_depth = depth - 1;
533 for (i, &child_addr) in child_addresses.iter().enumerate() {
534 if Cursor::is_undefined_offset(child_addr, offset_size) {
535 continue;
536 }
537 let child_nrec = child_nrecords[i];
538 if child_depth == 0 {
539 let mut child_cursor = Cursor::new(data);
541 child_cursor.set_position(child_addr);
542 parse_leaf_node(
543 &mut child_cursor,
544 header,
545 offset_size,
546 length_size,
547 ndims,
548 chunk_dims,
549 chunk_bounds,
550 child_nrec,
551 heap_id_len,
552 records,
553 )?;
554 } else {
555 parse_internal_node(
557 data,
558 child_addr,
559 header,
560 offset_size,
561 length_size,
562 ndims,
563 chunk_dims,
564 chunk_bounds,
565 child_nrec,
566 child_depth,
567 heap_id_len,
568 records,
569 )?;
570 }
571 }
572
573 Ok(())
574}
575
576pub fn collect_btree_v2_records(
585 data: &[u8],
586 header: &BTreeV2Header,
587 offset_size: u8,
588 length_size: u8,
589 ndims: Option<u32>,
590 chunk_dims: &[u32],
591 chunk_bounds: Option<(&[u64], &[u64])>,
592) -> Result<Vec<BTreeV2Record>> {
593 if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
594 return Ok(Vec::new());
595 }
596
597 if header.total_records == 0 || header.num_records_in_root == 0 {
598 return Ok(Vec::new());
599 }
600
601 let heap_id_len = compute_heap_id_len(header);
603
604 let mut records = Vec::new();
605
606 if header.depth == 0 {
607 let mut cursor = Cursor::new(data);
609 cursor.set_position(header.root_node_address);
610 parse_leaf_node(
611 &mut cursor,
612 header,
613 offset_size,
614 length_size,
615 ndims,
616 chunk_dims,
617 chunk_bounds,
618 header.num_records_in_root,
619 heap_id_len,
620 &mut records,
621 )?;
622 } else {
623 parse_internal_node(
625 data,
626 header.root_node_address,
627 header,
628 offset_size,
629 length_size,
630 ndims,
631 chunk_dims,
632 chunk_bounds,
633 header.num_records_in_root,
634 header.depth,
635 heap_id_len,
636 &mut records,
637 )?;
638 }
639
640 Ok(records)
641}
642
643fn compute_heap_id_len(header: &BTreeV2Header) -> usize {
649 let rs = header.record_size as usize;
650 match header.btree_type {
651 5 => rs.saturating_sub(4), 6 => rs.saturating_sub(8), 8 => rs.saturating_sub(4 + 1 + 4), 9 => rs.saturating_sub(4), _ => 0,
656 }
657}
658
659#[cfg(test)]
660mod tests {
661 use super::*;
662
663 #[allow(clippy::too_many_arguments)]
665 fn build_header(
666 btree_type: u8,
667 node_size: u32,
668 record_size: u16,
669 depth: u16,
670 root_node_address: u64,
671 num_records_in_root: u16,
672 total_records: u64,
673 offset_size: u8,
674 length_size: u8,
675 ) -> Vec<u8> {
676 let mut buf = Vec::new();
677 buf.extend_from_slice(b"BTHD");
678 buf.push(0); buf.push(btree_type);
680 buf.extend_from_slice(&node_size.to_le_bytes());
681 buf.extend_from_slice(&record_size.to_le_bytes());
682 buf.extend_from_slice(&depth.to_le_bytes());
683 buf.push(75); buf.push(40); match offset_size {
686 4 => buf.extend_from_slice(&(root_node_address as u32).to_le_bytes()),
687 8 => buf.extend_from_slice(&root_node_address.to_le_bytes()),
688 _ => panic!("unsupported"),
689 }
690 buf.extend_from_slice(&num_records_in_root.to_le_bytes());
691 match length_size {
692 4 => buf.extend_from_slice(&(total_records as u32).to_le_bytes()),
693 8 => buf.extend_from_slice(&total_records.to_le_bytes()),
694 _ => panic!("unsupported"),
695 }
696 let checksum = jenkins_lookup3(&buf);
698 buf.extend_from_slice(&checksum.to_le_bytes());
699 buf
700 }
701
702 #[test]
703 fn test_parse_header() {
704 let data = build_header(5, 4096, 12, 0, 0x1000, 3, 3, 8, 8);
705 let mut cursor = Cursor::new(&data);
706 let hdr = BTreeV2Header::parse(&mut cursor, 8, 8).unwrap();
707
708 assert_eq!(hdr.btree_type, 5);
709 assert_eq!(hdr.node_size, 4096);
710 assert_eq!(hdr.record_size, 12);
711 assert_eq!(hdr.depth, 0);
712 assert_eq!(hdr.split_percent, 75);
713 assert_eq!(hdr.merge_percent, 40);
714 assert_eq!(hdr.root_node_address, 0x1000);
715 assert_eq!(hdr.num_records_in_root, 3);
716 assert_eq!(hdr.total_records, 3);
717 }
718
719 #[test]
720 fn test_bad_signature() {
721 let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
722 data[0] = b'X';
723 let mut cursor = Cursor::new(&data);
724 assert!(matches!(
725 BTreeV2Header::parse(&mut cursor, 8, 8),
726 Err(Error::InvalidBTreeV2Signature { .. })
727 ));
728 }
729
730 #[test]
731 fn test_bad_checksum() {
732 let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
733 data[6] = 0xFF;
735 let mut cursor = Cursor::new(&data);
736 assert!(matches!(
737 BTreeV2Header::parse(&mut cursor, 8, 8),
738 Err(Error::ChecksumMismatch { .. })
739 ));
740 }
741
742 #[test]
743 fn test_collect_empty_tree() {
744 let header = BTreeV2Header {
745 btree_type: 5,
746 node_size: 4096,
747 record_size: 12,
748 depth: 0,
749 split_percent: 75,
750 merge_percent: 40,
751 root_node_address: u64::MAX,
752 num_records_in_root: 0,
753 total_records: 0,
754 };
755 let data = vec![0u8; 100];
756 let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
757 assert!(records.is_empty());
758 }
759
760 #[test]
761 fn test_compute_heap_id_len() {
762 let h5 = BTreeV2Header {
764 btree_type: 5,
765 record_size: 12,
766 node_size: 0,
767 depth: 0,
768 split_percent: 0,
769 merge_percent: 0,
770 root_node_address: 0,
771 num_records_in_root: 0,
772 total_records: 0,
773 };
774 assert_eq!(compute_heap_id_len(&h5), 8);
775
776 let h8 = BTreeV2Header {
778 btree_type: 8,
779 record_size: 17,
780 ..h5
781 };
782 assert_eq!(compute_heap_id_len(&h8), 8);
783 }
784
785 #[test]
786 fn test_max_leaf_records() {
787 assert_eq!(max_leaf_records(4096, 12), 340);
790 }
791
792 #[test]
793 fn test_num_records_size() {
794 assert_eq!(num_records_size(0), 1);
795 assert_eq!(num_records_size(255), 1);
796 assert_eq!(num_records_size(256), 2);
797 assert_eq!(num_records_size(65535), 2);
798 assert_eq!(num_records_size(65536), 4);
799 }
800
801 #[test]
802 fn test_parse_leaf_with_type5_records() {
803 let record_size: u16 = 12;
806 let node_size: u32 = 4096;
807
808 let header = BTreeV2Header {
809 btree_type: 5,
810 node_size,
811 record_size,
812 depth: 0,
813 split_percent: 75,
814 merge_percent: 40,
815 root_node_address: 0, num_records_in_root: 2,
817 total_records: 2,
818 };
819
820 let mut leaf = Vec::new();
822 leaf.extend_from_slice(b"BTLF"); leaf.push(0); leaf.push(5); leaf.extend_from_slice(&0xAABBCCDDu32.to_le_bytes());
828 leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
829
830 leaf.extend_from_slice(&0x11223344u32.to_le_bytes());
832 leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
833
834 let checksum = jenkins_lookup3(&leaf);
836 leaf.extend_from_slice(&checksum.to_le_bytes());
837
838 leaf.resize(node_size as usize, 0);
840
841 let mut records = Vec::new();
842 let mut cursor = Cursor::new(&leaf);
843 parse_leaf_node(
844 &mut cursor,
845 &header,
846 8,
847 8,
848 None,
849 &[],
850 None,
851 2,
852 8, &mut records,
854 )
855 .unwrap();
856
857 assert_eq!(records.len(), 2);
858 match &records[0] {
859 BTreeV2Record::LinkNameHash { hash, heap_id } => {
860 assert_eq!(*hash, 0xAABBCCDD);
861 assert_eq!(heap_id, &[1, 2, 3, 4, 5, 6, 7, 8]);
862 }
863 _ => panic!("expected LinkNameHash"),
864 }
865 match &records[1] {
866 BTreeV2Record::LinkNameHash { hash, heap_id } => {
867 assert_eq!(*hash, 0x11223344);
868 assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
869 }
870 _ => panic!("expected LinkNameHash"),
871 }
872 }
873}