1use std::collections::HashSet;
13
14use crate::error::{Error, Result};
15use crate::io::Cursor;
16use crate::storage::Storage;
17
18const BTREE_V1_SIGNATURE: [u8; 4] = *b"TREE";
20const MAX_BTREE_V1_RECURSION_DEPTH: usize = 256;
21
22#[derive(Debug, Clone)]
24pub enum BTreeV1Key {
25 Group { local_heap_offset: u64 },
27 RawData {
30 chunk_size: u32,
31 filter_mask: u32,
32 offsets: Vec<u64>,
33 },
34}
35
36#[derive(Debug, Clone)]
38pub struct BTreeV1Node {
39 pub node_type: u8,
41 pub level: u8,
43 pub entries_used: u16,
45 pub left_sibling: u64,
47 pub right_sibling: u64,
49 pub keys: Vec<BTreeV1Key>,
51 pub children: Vec<u64>,
53}
54
55impl BTreeV1Node {
56 pub fn parse(
73 cursor: &mut Cursor,
74 offset_size: u8,
75 length_size: u8,
76 ndims: Option<u32>,
77 ) -> Result<Self> {
78 let sig = cursor.read_bytes(4)?;
79 if sig != BTREE_V1_SIGNATURE {
80 return Err(Error::InvalidBTreeSignature);
81 }
82
83 let node_type = cursor.read_u8()?;
84 let level = cursor.read_u8()?;
85 let entries_used = cursor.read_u16_le()?;
86 let left_sibling = cursor.read_offset(offset_size)?;
87 let right_sibling = cursor.read_offset(offset_size)?;
88
89 let num_keys = entries_used as usize + 1;
90 let num_children = entries_used as usize;
91
92 let mut keys = Vec::with_capacity(num_keys);
93 let mut children = Vec::with_capacity(num_children);
94
95 for i in 0..num_keys {
98 let key = match node_type {
99 0 => parse_group_key(cursor, length_size)?,
100 1 => parse_raw_data_key(cursor, ndims)?,
101 _ => {
102 return Err(Error::InvalidData(format!(
103 "unknown v1 B-tree node type: {node_type}"
104 )));
105 }
106 };
107 keys.push(key);
108
109 if i < num_children {
111 let child_addr = cursor.read_offset(offset_size)?;
112 children.push(child_addr);
113 }
114 }
115
116 Ok(BTreeV1Node {
117 node_type,
118 level,
119 entries_used,
120 left_sibling,
121 right_sibling,
122 keys,
123 children,
124 })
125 }
126}
127
128fn parse_group_key(cursor: &mut Cursor, length_size: u8) -> Result<BTreeV1Key> {
130 let local_heap_offset = cursor.read_length(length_size)?;
131 Ok(BTreeV1Key::Group { local_heap_offset })
132}
133
134fn parse_raw_data_key(cursor: &mut Cursor, ndims: Option<u32>) -> Result<BTreeV1Key> {
149 let nd = ndims.ok_or_else(|| {
150 Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
151 })?;
152
153 let chunk_size = cursor.read_u32_le()?;
154 let filter_mask = cursor.read_u32_le()?;
155
156 let num_offsets = nd as usize + 1;
157 let mut offsets = Vec::with_capacity(num_offsets);
158 for _ in 0..num_offsets {
159 offsets.push(cursor.read_u64_le()?);
161 }
162
163 Ok(BTreeV1Key::RawData {
164 chunk_size,
165 filter_mask,
166 offsets,
167 })
168}
169
170pub fn collect_btree_v1_leaves(
179 data: &[u8],
180 root_address: u64,
181 offset_size: u8,
182 length_size: u8,
183 ndims: Option<u32>,
184 chunk_dims: &[u32],
185 chunk_bounds: Option<(&[u64], &[u64])>,
186) -> Result<Vec<(BTreeV1Key, u64)>> {
187 let mut results = Vec::new();
188 let mut visited = HashSet::new();
189 collect_recursive(
190 BTreeV1CollectContext {
191 data,
192 offset_size,
193 length_size,
194 ndims,
195 chunk_dims,
196 chunk_bounds,
197 },
198 root_address,
199 MAX_BTREE_V1_RECURSION_DEPTH,
200 &mut visited,
201 &mut results,
202 )?;
203 Ok(results)
204}
205
206pub fn collect_btree_v1_leaves_storage(
208 storage: &dyn Storage,
209 root_address: u64,
210 offset_size: u8,
211 length_size: u8,
212 ndims: Option<u32>,
213 chunk_dims: &[u32],
214 chunk_bounds: Option<(&[u64], &[u64])>,
215) -> Result<Vec<(BTreeV1Key, u64)>> {
216 let mut results = Vec::new();
217 let mut visited = HashSet::new();
218 collect_recursive_storage(
219 BTreeV1CollectContextStorage {
220 storage,
221 offset_size,
222 length_size,
223 ndims,
224 chunk_dims,
225 chunk_bounds,
226 },
227 root_address,
228 MAX_BTREE_V1_RECURSION_DEPTH,
229 &mut visited,
230 &mut results,
231 )?;
232 Ok(results)
233}
234
235#[derive(Clone, Copy)]
236struct BTreeV1CollectContext<'a> {
237 data: &'a [u8],
238 offset_size: u8,
239 length_size: u8,
240 ndims: Option<u32>,
241 chunk_dims: &'a [u32],
242 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
243}
244
245#[derive(Clone, Copy)]
246struct BTreeV1CollectContextStorage<'a> {
247 storage: &'a dyn Storage,
248 offset_size: u8,
249 length_size: u8,
250 ndims: Option<u32>,
251 chunk_dims: &'a [u32],
252 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
253}
254
255fn collect_recursive(
257 context: BTreeV1CollectContext<'_>,
258 address: u64,
259 depth_remaining: usize,
260 visited: &mut HashSet<u64>,
261 results: &mut Vec<(BTreeV1Key, u64)>,
262) -> Result<()> {
263 if Cursor::is_undefined_offset(address, context.offset_size) {
264 return Ok(());
265 }
266 if depth_remaining == 0 {
267 return Err(Error::InvalidData(
268 "B-tree v1 traversal exceeded recursion limit".into(),
269 ));
270 }
271 if !visited.insert(address) {
272 return Err(Error::InvalidData(format!(
273 "B-tree v1 traversal revisits node at offset {address:#x}"
274 )));
275 }
276
277 if address as usize >= context.data.len() {
278 return Err(Error::OffsetOutOfBounds(address));
279 }
280
281 let mut cursor = Cursor::new(context.data);
282 cursor.set_position(address);
283
284 let node = BTreeV1Node::parse(
285 &mut cursor,
286 context.offset_size,
287 context.length_size,
288 context.ndims,
289 )?;
290
291 if node.level == 0 {
292 for (i, child_addr) in node.children.iter().enumerate() {
294 let include = match &node.keys[i] {
295 BTreeV1Key::RawData { offsets, .. } => {
296 context.chunk_bounds.map_or(true, |(first, last)| {
297 offsets
298 .iter()
299 .zip(context.chunk_dims.iter())
300 .enumerate()
301 .all(|(dim, (offset, chunk_dim))| {
302 let chunk_index = *offset / u64::from(*chunk_dim);
303 chunk_index >= first[dim] && chunk_index <= last[dim]
304 })
305 })
306 }
307 _ => true,
308 };
309
310 if include {
311 results.push((node.keys[i].clone(), *child_addr));
312 }
313 }
314 } else {
315 for child_addr in &node.children {
317 collect_recursive(context, *child_addr, depth_remaining - 1, visited, results)?;
318 }
319 }
320
321 Ok(())
322}
323
324fn collect_recursive_storage(
325 context: BTreeV1CollectContextStorage<'_>,
326 address: u64,
327 depth_remaining: usize,
328 visited: &mut HashSet<u64>,
329 results: &mut Vec<(BTreeV1Key, u64)>,
330) -> Result<()> {
331 if Cursor::is_undefined_offset(address, context.offset_size) {
332 return Ok(());
333 }
334 if depth_remaining == 0 {
335 return Err(Error::InvalidData(
336 "B-tree v1 traversal exceeded recursion limit".into(),
337 ));
338 }
339 if !visited.insert(address) {
340 return Err(Error::InvalidData(format!(
341 "B-tree v1 traversal revisits node at offset {address:#x}"
342 )));
343 }
344
345 let header_len = 4 + 1 + 1 + 2 + 2 * usize::from(context.offset_size);
346 let header = context.storage.read_range(address, header_len)?;
347 let mut header_cursor = Cursor::new(header.as_ref());
348 let sig = header_cursor.read_bytes(4)?;
349 if sig != BTREE_V1_SIGNATURE {
350 return Err(Error::InvalidBTreeSignature);
351 }
352
353 let node_type = header_cursor.read_u8()?;
354 let _level = header_cursor.read_u8()?;
355 let entries_used = header_cursor.read_u16_le()?;
356 let key_size = match node_type {
357 0 => usize::from(context.length_size),
358 1 => {
359 let ndims = context.ndims.ok_or_else(|| {
360 Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
361 })?;
362 8 + (usize::try_from(ndims).map_err(|_| {
365 Error::InvalidData("B-tree v1 ndims exceeds platform usize capacity".into())
366 })? + 1)
367 * 8
368 }
369 _ => {
370 return Err(Error::InvalidData(format!(
371 "unknown v1 B-tree node type: {node_type}"
372 )));
373 }
374 };
375 let node_len = header_len
376 + (usize::from(entries_used) + 1) * key_size
377 + usize::from(entries_used) * usize::from(context.offset_size);
378 let node_bytes = context.storage.read_range(address, node_len)?;
379 let mut cursor = Cursor::new(node_bytes.as_ref());
380 let node = BTreeV1Node::parse(
381 &mut cursor,
382 context.offset_size,
383 context.length_size,
384 context.ndims,
385 )?;
386
387 if node.level == 0 {
388 for (i, child_addr) in node.children.iter().enumerate() {
389 let include = match &node.keys[i] {
390 BTreeV1Key::RawData { offsets, .. } => {
391 context.chunk_bounds.map_or(true, |(first, last)| {
392 offsets
393 .iter()
394 .zip(context.chunk_dims.iter())
395 .enumerate()
396 .all(|(dim, (offset, chunk_dim))| {
397 let chunk_index = *offset / u64::from(*chunk_dim);
398 chunk_index >= first[dim] && chunk_index <= last[dim]
399 })
400 })
401 }
402 _ => true,
403 };
404
405 if include {
406 results.push((node.keys[i].clone(), *child_addr));
407 }
408 }
409 } else {
410 for child_addr in &node.children {
411 collect_recursive_storage(context, *child_addr, depth_remaining - 1, visited, results)?;
412 }
413 }
414
415 Ok(())
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 #[allow(clippy::too_many_arguments)]
424 fn build_group_node(
425 level: u8,
426 entries_used: u16,
427 left_sibling: u64,
428 right_sibling: u64,
429 keys: &[u64], children: &[u64], offset_size: u8,
432 length_size: u8,
433 ) -> Vec<u8> {
434 assert_eq!(keys.len(), entries_used as usize + 1);
435 assert_eq!(children.len(), entries_used as usize);
436
437 let mut buf = Vec::new();
438 buf.extend_from_slice(b"TREE");
439 buf.push(0); buf.push(level);
441 buf.extend_from_slice(&entries_used.to_le_bytes());
442 push_offset(&mut buf, left_sibling, offset_size);
443 push_offset(&mut buf, right_sibling, offset_size);
444
445 for i in 0..keys.len() {
446 push_length(&mut buf, keys[i], length_size);
447 if i < children.len() {
448 push_offset(&mut buf, children[i], offset_size);
449 }
450 }
451
452 buf
453 }
454
455 fn build_rawdata_node(
457 level: u8,
458 entries_used: u16,
459 left_sibling: u64,
460 right_sibling: u64,
461 keys: &[(u32, u32, Vec<u64>)], children: &[u64],
463 offset_size: u8,
464 ) -> Vec<u8> {
465 assert_eq!(keys.len(), entries_used as usize + 1);
466 assert_eq!(children.len(), entries_used as usize);
467
468 let mut buf = Vec::new();
469 buf.extend_from_slice(b"TREE");
470 buf.push(1); buf.push(level);
472 buf.extend_from_slice(&entries_used.to_le_bytes());
473 push_offset(&mut buf, left_sibling, offset_size);
474 push_offset(&mut buf, right_sibling, offset_size);
475
476 for i in 0..keys.len() {
477 let (cs, fm, ref offs) = keys[i];
478 buf.extend_from_slice(&cs.to_le_bytes());
479 buf.extend_from_slice(&fm.to_le_bytes());
480 for &o in offs {
481 buf.extend_from_slice(&o.to_le_bytes());
485 }
486 if i < children.len() {
487 push_offset(&mut buf, children[i], offset_size);
488 }
489 }
490
491 buf
492 }
493
494 fn push_offset(buf: &mut Vec<u8>, val: u64, size: u8) {
495 match size {
496 4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
497 8 => buf.extend_from_slice(&val.to_le_bytes()),
498 _ => panic!("unsupported offset size in test"),
499 }
500 }
501
502 fn push_length(buf: &mut Vec<u8>, val: u64, size: u8) {
503 match size {
504 4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
505 8 => buf.extend_from_slice(&val.to_le_bytes()),
506 _ => panic!("unsupported length size in test"),
507 }
508 }
509
510 #[test]
511 fn parse_group_leaf_node() {
512 let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
513 let data = build_group_node(
514 0, 2, undef8, undef8, &[0, 8, 16], &[0x1000, 0x2000], 8,
521 8,
522 );
523
524 let mut cursor = Cursor::new(&data);
525 let node = BTreeV1Node::parse(&mut cursor, 8, 8, None).unwrap();
526
527 assert_eq!(node.node_type, 0);
528 assert_eq!(node.level, 0);
529 assert_eq!(node.entries_used, 2);
530 assert!(Cursor::is_undefined_offset(node.left_sibling, 8));
531 assert!(Cursor::is_undefined_offset(node.right_sibling, 8));
532 assert_eq!(node.keys.len(), 3);
533 assert_eq!(node.children.len(), 2);
534
535 match &node.keys[0] {
536 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
537 _ => panic!("expected Group key"),
538 }
539 match &node.keys[1] {
540 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 8),
541 _ => panic!("expected Group key"),
542 }
543 assert_eq!(node.children[0], 0x1000);
544 assert_eq!(node.children[1], 0x2000);
545 }
546
547 #[test]
548 fn parse_rawdata_leaf_node() {
549 let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
550 let data = build_rawdata_node(
552 0, 1, undef8,
555 undef8,
556 &[
557 (1024, 0, vec![0, 0, 0]), (1024, 0, vec![10, 20, 0]), ],
560 &[0x5000], 8,
562 );
563
564 let mut cursor = Cursor::new(&data);
565 let node = BTreeV1Node::parse(&mut cursor, 8, 8, Some(2)).unwrap();
566
567 assert_eq!(node.node_type, 1);
568 assert_eq!(node.level, 0);
569 assert_eq!(node.entries_used, 1);
570 assert_eq!(node.keys.len(), 2);
571 assert_eq!(node.children.len(), 1);
572
573 match &node.keys[0] {
574 BTreeV1Key::RawData {
575 chunk_size,
576 filter_mask,
577 offsets,
578 } => {
579 assert_eq!(*chunk_size, 1024);
580 assert_eq!(*filter_mask, 0);
581 assert_eq!(offsets, &[0, 0, 0]);
582 }
583 _ => panic!("expected RawData key"),
584 }
585
586 assert_eq!(node.children[0], 0x5000);
587 }
588
589 #[test]
595 fn parse_rawdata_leaf_4byte() {
596 let undef4 = 0xFFFF_FFFFu64;
597 let data = build_rawdata_node(
598 0,
599 1,
600 undef4,
601 undef4,
602 &[
603 (512, 0x03, vec![0, 0]), (512, 0, vec![100, 0]),
605 ],
606 &[0x3000],
607 4,
608 );
609
610 let mut cursor = Cursor::new(&data);
611 let node = BTreeV1Node::parse(&mut cursor, 4, 4, Some(1)).unwrap();
612
613 assert_eq!(node.node_type, 1);
614 match &node.keys[0] {
615 BTreeV1Key::RawData {
616 chunk_size,
617 filter_mask,
618 offsets,
619 } => {
620 assert_eq!(*chunk_size, 512);
621 assert_eq!(*filter_mask, 0x03);
622 assert_eq!(offsets, &[0, 0]);
623 }
624 _ => panic!("expected RawData key"),
625 }
626 assert_eq!(node.children[0], 0x3000);
629 }
630
631 #[test]
632 fn bad_signature() {
633 let mut data = build_group_node(0, 0, u64::MAX, u64::MAX, &[0], &[], 8, 8);
634 data[0] = b'X'; let mut cursor = Cursor::new(&data);
636 assert!(matches!(
637 BTreeV1Node::parse(&mut cursor, 8, 8, None),
638 Err(Error::InvalidBTreeSignature)
639 ));
640 }
641
642 #[test]
643 fn collect_leaves_single_leaf() {
644 let undef8 = u64::MAX;
645 let node_data = build_group_node(0, 2, undef8, undef8, &[0, 5, 10], &[0x100, 0x200], 8, 8);
647
648 let results = collect_btree_v1_leaves(&node_data, 0, 8, 8, None, &[], None).unwrap();
649
650 assert_eq!(results.len(), 2);
651 match &results[0].0 {
652 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
653 _ => panic!("expected Group key"),
654 }
655 assert_eq!(results[0].1, 0x100);
656 assert_eq!(results[1].1, 0x200);
657 }
658
659 #[test]
660 fn collect_leaves_two_level_tree() {
661 let undef8 = u64::MAX;
662
663 let leaf_a = build_group_node(0, 1, undef8, undef8, &[0, 5], &[0xA00], 8, 8);
669
670 let leaf_b = build_group_node(0, 1, undef8, undef8, &[10, 15], &[0xB00], 8, 8);
671
672 let root = build_group_node(
673 1,
674 2,
675 undef8,
676 undef8,
677 &[0, 5, 15], &[1000, 2000], 8,
680 8,
681 );
682
683 let total_size = 3000 + leaf_b.len();
685 let mut file_data = vec![0u8; total_size];
686 file_data[..root.len()].copy_from_slice(&root);
687 file_data[1000..1000 + leaf_a.len()].copy_from_slice(&leaf_a);
688 file_data[2000..2000 + leaf_b.len()].copy_from_slice(&leaf_b);
689
690 let results = collect_btree_v1_leaves(&file_data, 0, 8, 8, None, &[], None).unwrap();
691
692 assert_eq!(results.len(), 2);
693 assert_eq!(results[0].1, 0xA00);
694 assert_eq!(results[1].1, 0xB00);
695 }
696
697 #[test]
698 fn collect_rejects_revisited_node() {
699 let undef8 = u64::MAX;
700 let root = build_group_node(
701 1,
702 1,
703 undef8,
704 undef8,
705 &[0, 8],
706 &[0], 8,
708 8,
709 );
710
711 let err = collect_btree_v1_leaves(&root, 0, 8, 8, None, &[], None).unwrap_err();
712 assert!(matches!(err, Error::InvalidData(message) if message.contains("revisits node")));
713
714 let storage = crate::storage::BytesStorage::new(root);
715 let err = collect_btree_v1_leaves_storage(&storage, 0, 8, 8, None, &[], None).unwrap_err();
716 assert!(matches!(err, Error::InvalidData(message) if message.contains("revisits node")));
717 }
718
719 #[test]
720 fn collect_undefined_root() {
721 let data = vec![0u8; 100];
723 let results = collect_btree_v1_leaves(&data, u64::MAX, 8, 8, None, &[], None).unwrap();
724 assert!(results.is_empty());
725 }
726}