1use crate::error::{Error, Result};
13use crate::io::Cursor;
14use crate::storage::Storage;
15
16const BTREE_V1_SIGNATURE: [u8; 4] = *b"TREE";
18
19#[derive(Debug, Clone)]
21pub enum BTreeV1Key {
22 Group { local_heap_offset: u64 },
24 RawData {
27 chunk_size: u32,
28 filter_mask: u32,
29 offsets: Vec<u64>,
30 },
31}
32
33#[derive(Debug, Clone)]
35pub struct BTreeV1Node {
36 pub node_type: u8,
38 pub level: u8,
40 pub entries_used: u16,
42 pub left_sibling: u64,
44 pub right_sibling: u64,
46 pub keys: Vec<BTreeV1Key>,
48 pub children: Vec<u64>,
50}
51
52impl BTreeV1Node {
53 pub fn parse(
70 cursor: &mut Cursor,
71 offset_size: u8,
72 length_size: u8,
73 ndims: Option<u32>,
74 ) -> Result<Self> {
75 let sig = cursor.read_bytes(4)?;
76 if sig != BTREE_V1_SIGNATURE {
77 return Err(Error::InvalidBTreeSignature);
78 }
79
80 let node_type = cursor.read_u8()?;
81 let level = cursor.read_u8()?;
82 let entries_used = cursor.read_u16_le()?;
83 let left_sibling = cursor.read_offset(offset_size)?;
84 let right_sibling = cursor.read_offset(offset_size)?;
85
86 let num_keys = entries_used as usize + 1;
87 let num_children = entries_used as usize;
88
89 let mut keys = Vec::with_capacity(num_keys);
90 let mut children = Vec::with_capacity(num_children);
91
92 for i in 0..num_keys {
95 let key = match node_type {
96 0 => parse_group_key(cursor, length_size)?,
97 1 => parse_raw_data_key(cursor, ndims)?,
98 _ => {
99 return Err(Error::InvalidData(format!(
100 "unknown v1 B-tree node type: {node_type}"
101 )));
102 }
103 };
104 keys.push(key);
105
106 if i < num_children {
108 let child_addr = cursor.read_offset(offset_size)?;
109 children.push(child_addr);
110 }
111 }
112
113 Ok(BTreeV1Node {
114 node_type,
115 level,
116 entries_used,
117 left_sibling,
118 right_sibling,
119 keys,
120 children,
121 })
122 }
123}
124
125fn parse_group_key(cursor: &mut Cursor, length_size: u8) -> Result<BTreeV1Key> {
127 let local_heap_offset = cursor.read_length(length_size)?;
128 Ok(BTreeV1Key::Group { local_heap_offset })
129}
130
131fn parse_raw_data_key(cursor: &mut Cursor, ndims: Option<u32>) -> Result<BTreeV1Key> {
146 let nd = ndims.ok_or_else(|| {
147 Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
148 })?;
149
150 let chunk_size = cursor.read_u32_le()?;
151 let filter_mask = cursor.read_u32_le()?;
152
153 let num_offsets = nd as usize + 1;
154 let mut offsets = Vec::with_capacity(num_offsets);
155 for _ in 0..num_offsets {
156 offsets.push(cursor.read_u64_le()?);
158 }
159
160 Ok(BTreeV1Key::RawData {
161 chunk_size,
162 filter_mask,
163 offsets,
164 })
165}
166
167pub fn collect_btree_v1_leaves(
176 data: &[u8],
177 root_address: u64,
178 offset_size: u8,
179 length_size: u8,
180 ndims: Option<u32>,
181 chunk_dims: &[u32],
182 chunk_bounds: Option<(&[u64], &[u64])>,
183) -> Result<Vec<(BTreeV1Key, u64)>> {
184 let mut results = Vec::new();
185 collect_recursive(
186 BTreeV1CollectContext {
187 data,
188 offset_size,
189 length_size,
190 ndims,
191 chunk_dims,
192 chunk_bounds,
193 },
194 root_address,
195 &mut results,
196 )?;
197 Ok(results)
198}
199
200pub fn collect_btree_v1_leaves_storage(
202 storage: &dyn Storage,
203 root_address: u64,
204 offset_size: u8,
205 length_size: u8,
206 ndims: Option<u32>,
207 chunk_dims: &[u32],
208 chunk_bounds: Option<(&[u64], &[u64])>,
209) -> Result<Vec<(BTreeV1Key, u64)>> {
210 let mut results = Vec::new();
211 collect_recursive_storage(
212 BTreeV1CollectContextStorage {
213 storage,
214 offset_size,
215 length_size,
216 ndims,
217 chunk_dims,
218 chunk_bounds,
219 },
220 root_address,
221 &mut results,
222 )?;
223 Ok(results)
224}
225
226#[derive(Clone, Copy)]
227struct BTreeV1CollectContext<'a> {
228 data: &'a [u8],
229 offset_size: u8,
230 length_size: u8,
231 ndims: Option<u32>,
232 chunk_dims: &'a [u32],
233 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
234}
235
236#[derive(Clone, Copy)]
237struct BTreeV1CollectContextStorage<'a> {
238 storage: &'a dyn Storage,
239 offset_size: u8,
240 length_size: u8,
241 ndims: Option<u32>,
242 chunk_dims: &'a [u32],
243 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
244}
245
246fn collect_recursive(
248 context: BTreeV1CollectContext<'_>,
249 address: u64,
250 results: &mut Vec<(BTreeV1Key, u64)>,
251) -> Result<()> {
252 if Cursor::is_undefined_offset(address, context.offset_size) {
253 return Ok(());
254 }
255
256 if address as usize >= context.data.len() {
257 return Err(Error::OffsetOutOfBounds(address));
258 }
259
260 let mut cursor = Cursor::new(context.data);
261 cursor.set_position(address);
262
263 let node = BTreeV1Node::parse(
264 &mut cursor,
265 context.offset_size,
266 context.length_size,
267 context.ndims,
268 )?;
269
270 if node.level == 0 {
271 for (i, child_addr) in node.children.iter().enumerate() {
273 let include = match &node.keys[i] {
274 BTreeV1Key::RawData { offsets, .. } => {
275 context.chunk_bounds.map_or(true, |(first, last)| {
276 offsets
277 .iter()
278 .zip(context.chunk_dims.iter())
279 .enumerate()
280 .all(|(dim, (offset, chunk_dim))| {
281 let chunk_index = *offset / u64::from(*chunk_dim);
282 chunk_index >= first[dim] && chunk_index <= last[dim]
283 })
284 })
285 }
286 _ => true,
287 };
288
289 if include {
290 results.push((node.keys[i].clone(), *child_addr));
291 }
292 }
293 } else {
294 for child_addr in &node.children {
296 collect_recursive(context, *child_addr, results)?;
297 }
298 }
299
300 Ok(())
301}
302
303fn collect_recursive_storage(
304 context: BTreeV1CollectContextStorage<'_>,
305 address: u64,
306 results: &mut Vec<(BTreeV1Key, u64)>,
307) -> Result<()> {
308 if Cursor::is_undefined_offset(address, context.offset_size) {
309 return Ok(());
310 }
311
312 let header_len = 4 + 1 + 1 + 2 + 2 * usize::from(context.offset_size);
313 let header = context.storage.read_range(address, header_len)?;
314 let mut header_cursor = Cursor::new(header.as_ref());
315 let sig = header_cursor.read_bytes(4)?;
316 if sig != BTREE_V1_SIGNATURE {
317 return Err(Error::InvalidBTreeSignature);
318 }
319
320 let node_type = header_cursor.read_u8()?;
321 let _level = header_cursor.read_u8()?;
322 let entries_used = header_cursor.read_u16_le()?;
323 let key_size = match node_type {
324 0 => usize::from(context.length_size),
325 1 => {
326 let ndims = context.ndims.ok_or_else(|| {
327 Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
328 })?;
329 8 + (usize::try_from(ndims).map_err(|_| {
332 Error::InvalidData("B-tree v1 ndims exceeds platform usize capacity".into())
333 })? + 1)
334 * 8
335 }
336 _ => {
337 return Err(Error::InvalidData(format!(
338 "unknown v1 B-tree node type: {node_type}"
339 )));
340 }
341 };
342 let node_len = header_len
343 + (usize::from(entries_used) + 1) * key_size
344 + usize::from(entries_used) * usize::from(context.offset_size);
345 let node_bytes = context.storage.read_range(address, node_len)?;
346 let mut cursor = Cursor::new(node_bytes.as_ref());
347 let node = BTreeV1Node::parse(
348 &mut cursor,
349 context.offset_size,
350 context.length_size,
351 context.ndims,
352 )?;
353
354 if node.level == 0 {
355 for (i, child_addr) in node.children.iter().enumerate() {
356 let include = match &node.keys[i] {
357 BTreeV1Key::RawData { offsets, .. } => {
358 context.chunk_bounds.map_or(true, |(first, last)| {
359 offsets
360 .iter()
361 .zip(context.chunk_dims.iter())
362 .enumerate()
363 .all(|(dim, (offset, chunk_dim))| {
364 let chunk_index = *offset / u64::from(*chunk_dim);
365 chunk_index >= first[dim] && chunk_index <= last[dim]
366 })
367 })
368 }
369 _ => true,
370 };
371
372 if include {
373 results.push((node.keys[i].clone(), *child_addr));
374 }
375 }
376 } else {
377 for child_addr in &node.children {
378 collect_recursive_storage(context, *child_addr, results)?;
379 }
380 }
381
382 Ok(())
383}
384
385#[cfg(test)]
386mod tests {
387 use super::*;
388
389 #[allow(clippy::too_many_arguments)]
391 fn build_group_node(
392 level: u8,
393 entries_used: u16,
394 left_sibling: u64,
395 right_sibling: u64,
396 keys: &[u64], children: &[u64], offset_size: u8,
399 length_size: u8,
400 ) -> Vec<u8> {
401 assert_eq!(keys.len(), entries_used as usize + 1);
402 assert_eq!(children.len(), entries_used as usize);
403
404 let mut buf = Vec::new();
405 buf.extend_from_slice(b"TREE");
406 buf.push(0); buf.push(level);
408 buf.extend_from_slice(&entries_used.to_le_bytes());
409 push_offset(&mut buf, left_sibling, offset_size);
410 push_offset(&mut buf, right_sibling, offset_size);
411
412 for i in 0..keys.len() {
413 push_length(&mut buf, keys[i], length_size);
414 if i < children.len() {
415 push_offset(&mut buf, children[i], offset_size);
416 }
417 }
418
419 buf
420 }
421
422 fn build_rawdata_node(
424 level: u8,
425 entries_used: u16,
426 left_sibling: u64,
427 right_sibling: u64,
428 keys: &[(u32, u32, Vec<u64>)], children: &[u64],
430 offset_size: u8,
431 ) -> Vec<u8> {
432 assert_eq!(keys.len(), entries_used as usize + 1);
433 assert_eq!(children.len(), entries_used as usize);
434
435 let mut buf = Vec::new();
436 buf.extend_from_slice(b"TREE");
437 buf.push(1); buf.push(level);
439 buf.extend_from_slice(&entries_used.to_le_bytes());
440 push_offset(&mut buf, left_sibling, offset_size);
441 push_offset(&mut buf, right_sibling, offset_size);
442
443 for i in 0..keys.len() {
444 let (cs, fm, ref offs) = keys[i];
445 buf.extend_from_slice(&cs.to_le_bytes());
446 buf.extend_from_slice(&fm.to_le_bytes());
447 for &o in offs {
448 buf.extend_from_slice(&o.to_le_bytes());
452 }
453 if i < children.len() {
454 push_offset(&mut buf, children[i], offset_size);
455 }
456 }
457
458 buf
459 }
460
461 fn push_offset(buf: &mut Vec<u8>, val: u64, size: u8) {
462 match size {
463 4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
464 8 => buf.extend_from_slice(&val.to_le_bytes()),
465 _ => panic!("unsupported offset size in test"),
466 }
467 }
468
469 fn push_length(buf: &mut Vec<u8>, val: u64, size: u8) {
470 match size {
471 4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
472 8 => buf.extend_from_slice(&val.to_le_bytes()),
473 _ => panic!("unsupported length size in test"),
474 }
475 }
476
477 #[test]
478 fn parse_group_leaf_node() {
479 let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
480 let data = build_group_node(
481 0, 2, undef8, undef8, &[0, 8, 16], &[0x1000, 0x2000], 8,
488 8,
489 );
490
491 let mut cursor = Cursor::new(&data);
492 let node = BTreeV1Node::parse(&mut cursor, 8, 8, None).unwrap();
493
494 assert_eq!(node.node_type, 0);
495 assert_eq!(node.level, 0);
496 assert_eq!(node.entries_used, 2);
497 assert!(Cursor::is_undefined_offset(node.left_sibling, 8));
498 assert!(Cursor::is_undefined_offset(node.right_sibling, 8));
499 assert_eq!(node.keys.len(), 3);
500 assert_eq!(node.children.len(), 2);
501
502 match &node.keys[0] {
503 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
504 _ => panic!("expected Group key"),
505 }
506 match &node.keys[1] {
507 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 8),
508 _ => panic!("expected Group key"),
509 }
510 assert_eq!(node.children[0], 0x1000);
511 assert_eq!(node.children[1], 0x2000);
512 }
513
514 #[test]
515 fn parse_rawdata_leaf_node() {
516 let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
517 let data = build_rawdata_node(
519 0, 1, undef8,
522 undef8,
523 &[
524 (1024, 0, vec![0, 0, 0]), (1024, 0, vec![10, 20, 0]), ],
527 &[0x5000], 8,
529 );
530
531 let mut cursor = Cursor::new(&data);
532 let node = BTreeV1Node::parse(&mut cursor, 8, 8, Some(2)).unwrap();
533
534 assert_eq!(node.node_type, 1);
535 assert_eq!(node.level, 0);
536 assert_eq!(node.entries_used, 1);
537 assert_eq!(node.keys.len(), 2);
538 assert_eq!(node.children.len(), 1);
539
540 match &node.keys[0] {
541 BTreeV1Key::RawData {
542 chunk_size,
543 filter_mask,
544 offsets,
545 } => {
546 assert_eq!(*chunk_size, 1024);
547 assert_eq!(*filter_mask, 0);
548 assert_eq!(offsets, &[0, 0, 0]);
549 }
550 _ => panic!("expected RawData key"),
551 }
552
553 assert_eq!(node.children[0], 0x5000);
554 }
555
556 #[test]
562 fn parse_rawdata_leaf_4byte() {
563 let undef4 = 0xFFFF_FFFFu64;
564 let data = build_rawdata_node(
565 0,
566 1,
567 undef4,
568 undef4,
569 &[
570 (512, 0x03, vec![0, 0]), (512, 0, vec![100, 0]),
572 ],
573 &[0x3000],
574 4,
575 );
576
577 let mut cursor = Cursor::new(&data);
578 let node = BTreeV1Node::parse(&mut cursor, 4, 4, Some(1)).unwrap();
579
580 assert_eq!(node.node_type, 1);
581 match &node.keys[0] {
582 BTreeV1Key::RawData {
583 chunk_size,
584 filter_mask,
585 offsets,
586 } => {
587 assert_eq!(*chunk_size, 512);
588 assert_eq!(*filter_mask, 0x03);
589 assert_eq!(offsets, &[0, 0]);
590 }
591 _ => panic!("expected RawData key"),
592 }
593 assert_eq!(node.children[0], 0x3000);
596 }
597
598 #[test]
599 fn bad_signature() {
600 let mut data = build_group_node(0, 0, u64::MAX, u64::MAX, &[0], &[], 8, 8);
601 data[0] = b'X'; let mut cursor = Cursor::new(&data);
603 assert!(matches!(
604 BTreeV1Node::parse(&mut cursor, 8, 8, None),
605 Err(Error::InvalidBTreeSignature)
606 ));
607 }
608
609 #[test]
610 fn collect_leaves_single_leaf() {
611 let undef8 = u64::MAX;
612 let node_data = build_group_node(0, 2, undef8, undef8, &[0, 5, 10], &[0x100, 0x200], 8, 8);
614
615 let results = collect_btree_v1_leaves(&node_data, 0, 8, 8, None, &[], None).unwrap();
616
617 assert_eq!(results.len(), 2);
618 match &results[0].0 {
619 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
620 _ => panic!("expected Group key"),
621 }
622 assert_eq!(results[0].1, 0x100);
623 assert_eq!(results[1].1, 0x200);
624 }
625
626 #[test]
627 fn collect_leaves_two_level_tree() {
628 let undef8 = u64::MAX;
629
630 let leaf_a = build_group_node(0, 1, undef8, undef8, &[0, 5], &[0xA00], 8, 8);
636
637 let leaf_b = build_group_node(0, 1, undef8, undef8, &[10, 15], &[0xB00], 8, 8);
638
639 let root = build_group_node(
640 1,
641 2,
642 undef8,
643 undef8,
644 &[0, 5, 15], &[1000, 2000], 8,
647 8,
648 );
649
650 let total_size = 3000 + leaf_b.len();
652 let mut file_data = vec![0u8; total_size];
653 file_data[..root.len()].copy_from_slice(&root);
654 file_data[1000..1000 + leaf_a.len()].copy_from_slice(&leaf_a);
655 file_data[2000..2000 + leaf_b.len()].copy_from_slice(&leaf_b);
656
657 let results = collect_btree_v1_leaves(&file_data, 0, 8, 8, None, &[], None).unwrap();
658
659 assert_eq!(results.len(), 2);
660 assert_eq!(results[0].1, 0xA00);
661 assert_eq!(results[1].1, 0xB00);
662 }
663
664 #[test]
665 fn collect_undefined_root() {
666 let data = vec![0u8; 100];
668 let results = collect_btree_v1_leaves(&data, u64::MAX, 8, 8, None, &[], None).unwrap();
669 assert!(results.is_empty());
670 }
671}