1use crate::error::{Error, Result};
13use crate::io::Cursor;
14use crate::storage::Storage;
15
16const BTREE_V1_SIGNATURE: [u8; 4] = *b"TREE";
18
19#[derive(Debug, Clone)]
21pub enum BTreeV1Key {
22 Group { local_heap_offset: u64 },
24 RawData {
27 chunk_size: u32,
28 filter_mask: u32,
29 offsets: Vec<u64>,
30 },
31}
32
33#[derive(Debug, Clone)]
35pub struct BTreeV1Node {
36 pub node_type: u8,
38 pub level: u8,
40 pub entries_used: u16,
42 pub left_sibling: u64,
44 pub right_sibling: u64,
46 pub keys: Vec<BTreeV1Key>,
48 pub children: Vec<u64>,
50}
51
52impl BTreeV1Node {
53 pub fn parse(
70 cursor: &mut Cursor,
71 offset_size: u8,
72 length_size: u8,
73 ndims: Option<u32>,
74 ) -> Result<Self> {
75 let sig = cursor.read_bytes(4)?;
76 if sig != BTREE_V1_SIGNATURE {
77 return Err(Error::InvalidBTreeSignature);
78 }
79
80 let node_type = cursor.read_u8()?;
81 let level = cursor.read_u8()?;
82 let entries_used = cursor.read_u16_le()?;
83 let left_sibling = cursor.read_offset(offset_size)?;
84 let right_sibling = cursor.read_offset(offset_size)?;
85
86 let num_keys = entries_used as usize + 1;
87 let num_children = entries_used as usize;
88
89 let mut keys = Vec::with_capacity(num_keys);
90 let mut children = Vec::with_capacity(num_children);
91
92 for i in 0..num_keys {
95 let key = match node_type {
96 0 => parse_group_key(cursor, length_size)?,
97 1 => parse_raw_data_key(cursor, offset_size, ndims)?,
98 _ => {
99 return Err(Error::InvalidData(format!(
100 "unknown v1 B-tree node type: {node_type}"
101 )));
102 }
103 };
104 keys.push(key);
105
106 if i < num_children {
108 let child_addr = cursor.read_offset(offset_size)?;
109 children.push(child_addr);
110 }
111 }
112
113 Ok(BTreeV1Node {
114 node_type,
115 level,
116 entries_used,
117 left_sibling,
118 right_sibling,
119 keys,
120 children,
121 })
122 }
123}
124
125fn parse_group_key(cursor: &mut Cursor, length_size: u8) -> Result<BTreeV1Key> {
127 let local_heap_offset = cursor.read_length(length_size)?;
128 Ok(BTreeV1Key::Group { local_heap_offset })
129}
130
131fn parse_raw_data_key(
139 cursor: &mut Cursor,
140 offset_size: u8,
141 ndims: Option<u32>,
142) -> Result<BTreeV1Key> {
143 let nd = ndims.ok_or_else(|| {
144 Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
145 })?;
146
147 let chunk_size = cursor.read_u32_le()?;
148 let filter_mask = cursor.read_u32_le()?;
149
150 let num_offsets = nd as usize + 1;
151 let mut offsets = Vec::with_capacity(num_offsets);
152 for _ in 0..num_offsets {
153 offsets.push(cursor.read_offset(offset_size)?);
154 }
155
156 Ok(BTreeV1Key::RawData {
157 chunk_size,
158 filter_mask,
159 offsets,
160 })
161}
162
163pub fn collect_btree_v1_leaves(
172 data: &[u8],
173 root_address: u64,
174 offset_size: u8,
175 length_size: u8,
176 ndims: Option<u32>,
177 chunk_dims: &[u32],
178 chunk_bounds: Option<(&[u64], &[u64])>,
179) -> Result<Vec<(BTreeV1Key, u64)>> {
180 let mut results = Vec::new();
181 collect_recursive(
182 BTreeV1CollectContext {
183 data,
184 offset_size,
185 length_size,
186 ndims,
187 chunk_dims,
188 chunk_bounds,
189 },
190 root_address,
191 &mut results,
192 )?;
193 Ok(results)
194}
195
196pub fn collect_btree_v1_leaves_storage(
198 storage: &dyn Storage,
199 root_address: u64,
200 offset_size: u8,
201 length_size: u8,
202 ndims: Option<u32>,
203 chunk_dims: &[u32],
204 chunk_bounds: Option<(&[u64], &[u64])>,
205) -> Result<Vec<(BTreeV1Key, u64)>> {
206 let mut results = Vec::new();
207 collect_recursive_storage(
208 BTreeV1CollectContextStorage {
209 storage,
210 offset_size,
211 length_size,
212 ndims,
213 chunk_dims,
214 chunk_bounds,
215 },
216 root_address,
217 &mut results,
218 )?;
219 Ok(results)
220}
221
222#[derive(Clone, Copy)]
223struct BTreeV1CollectContext<'a> {
224 data: &'a [u8],
225 offset_size: u8,
226 length_size: u8,
227 ndims: Option<u32>,
228 chunk_dims: &'a [u32],
229 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
230}
231
232#[derive(Clone, Copy)]
233struct BTreeV1CollectContextStorage<'a> {
234 storage: &'a dyn Storage,
235 offset_size: u8,
236 length_size: u8,
237 ndims: Option<u32>,
238 chunk_dims: &'a [u32],
239 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
240}
241
242fn collect_recursive(
244 context: BTreeV1CollectContext<'_>,
245 address: u64,
246 results: &mut Vec<(BTreeV1Key, u64)>,
247) -> Result<()> {
248 if Cursor::is_undefined_offset(address, context.offset_size) {
249 return Ok(());
250 }
251
252 if address as usize >= context.data.len() {
253 return Err(Error::OffsetOutOfBounds(address));
254 }
255
256 let mut cursor = Cursor::new(context.data);
257 cursor.set_position(address);
258
259 let node = BTreeV1Node::parse(
260 &mut cursor,
261 context.offset_size,
262 context.length_size,
263 context.ndims,
264 )?;
265
266 if node.level == 0 {
267 for (i, child_addr) in node.children.iter().enumerate() {
269 let include = match &node.keys[i] {
270 BTreeV1Key::RawData { offsets, .. } => {
271 context.chunk_bounds.map_or(true, |(first, last)| {
272 offsets
273 .iter()
274 .zip(context.chunk_dims.iter())
275 .enumerate()
276 .all(|(dim, (offset, chunk_dim))| {
277 let chunk_index = *offset / u64::from(*chunk_dim);
278 chunk_index >= first[dim] && chunk_index <= last[dim]
279 })
280 })
281 }
282 _ => true,
283 };
284
285 if include {
286 results.push((node.keys[i].clone(), *child_addr));
287 }
288 }
289 } else {
290 for child_addr in &node.children {
292 collect_recursive(context, *child_addr, results)?;
293 }
294 }
295
296 Ok(())
297}
298
299fn collect_recursive_storage(
300 context: BTreeV1CollectContextStorage<'_>,
301 address: u64,
302 results: &mut Vec<(BTreeV1Key, u64)>,
303) -> Result<()> {
304 if Cursor::is_undefined_offset(address, context.offset_size) {
305 return Ok(());
306 }
307
308 let header_len = 4 + 1 + 1 + 2 + 2 * usize::from(context.offset_size);
309 let header = context.storage.read_range(address, header_len)?;
310 let mut header_cursor = Cursor::new(header.as_ref());
311 let sig = header_cursor.read_bytes(4)?;
312 if sig != BTREE_V1_SIGNATURE {
313 return Err(Error::InvalidBTreeSignature);
314 }
315
316 let node_type = header_cursor.read_u8()?;
317 let _level = header_cursor.read_u8()?;
318 let entries_used = header_cursor.read_u16_le()?;
319 let key_size = match node_type {
320 0 => usize::from(context.length_size),
321 1 => {
322 let ndims = context.ndims.ok_or_else(|| {
323 Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
324 })?;
325 8 + (usize::try_from(ndims).map_err(|_| {
326 Error::InvalidData("B-tree v1 ndims exceeds platform usize capacity".into())
327 })? + 1)
328 * usize::from(context.offset_size)
329 }
330 _ => {
331 return Err(Error::InvalidData(format!(
332 "unknown v1 B-tree node type: {node_type}"
333 )));
334 }
335 };
336 let node_len = header_len
337 + (usize::from(entries_used) + 1) * key_size
338 + usize::from(entries_used) * usize::from(context.offset_size);
339 let node_bytes = context.storage.read_range(address, node_len)?;
340 let mut cursor = Cursor::new(node_bytes.as_ref());
341 let node = BTreeV1Node::parse(
342 &mut cursor,
343 context.offset_size,
344 context.length_size,
345 context.ndims,
346 )?;
347
348 if node.level == 0 {
349 for (i, child_addr) in node.children.iter().enumerate() {
350 let include = match &node.keys[i] {
351 BTreeV1Key::RawData { offsets, .. } => {
352 context.chunk_bounds.map_or(true, |(first, last)| {
353 offsets
354 .iter()
355 .zip(context.chunk_dims.iter())
356 .enumerate()
357 .all(|(dim, (offset, chunk_dim))| {
358 let chunk_index = *offset / u64::from(*chunk_dim);
359 chunk_index >= first[dim] && chunk_index <= last[dim]
360 })
361 })
362 }
363 _ => true,
364 };
365
366 if include {
367 results.push((node.keys[i].clone(), *child_addr));
368 }
369 }
370 } else {
371 for child_addr in &node.children {
372 collect_recursive_storage(context, *child_addr, results)?;
373 }
374 }
375
376 Ok(())
377}
378
379#[cfg(test)]
380mod tests {
381 use super::*;
382
383 #[allow(clippy::too_many_arguments)]
385 fn build_group_node(
386 level: u8,
387 entries_used: u16,
388 left_sibling: u64,
389 right_sibling: u64,
390 keys: &[u64], children: &[u64], offset_size: u8,
393 length_size: u8,
394 ) -> Vec<u8> {
395 assert_eq!(keys.len(), entries_used as usize + 1);
396 assert_eq!(children.len(), entries_used as usize);
397
398 let mut buf = Vec::new();
399 buf.extend_from_slice(b"TREE");
400 buf.push(0); buf.push(level);
402 buf.extend_from_slice(&entries_used.to_le_bytes());
403 push_offset(&mut buf, left_sibling, offset_size);
404 push_offset(&mut buf, right_sibling, offset_size);
405
406 for i in 0..keys.len() {
407 push_length(&mut buf, keys[i], length_size);
408 if i < children.len() {
409 push_offset(&mut buf, children[i], offset_size);
410 }
411 }
412
413 buf
414 }
415
416 fn build_rawdata_node(
418 level: u8,
419 entries_used: u16,
420 left_sibling: u64,
421 right_sibling: u64,
422 keys: &[(u32, u32, Vec<u64>)], children: &[u64],
424 offset_size: u8,
425 ) -> Vec<u8> {
426 assert_eq!(keys.len(), entries_used as usize + 1);
427 assert_eq!(children.len(), entries_used as usize);
428
429 let mut buf = Vec::new();
430 buf.extend_from_slice(b"TREE");
431 buf.push(1); buf.push(level);
433 buf.extend_from_slice(&entries_used.to_le_bytes());
434 push_offset(&mut buf, left_sibling, offset_size);
435 push_offset(&mut buf, right_sibling, offset_size);
436
437 for i in 0..keys.len() {
438 let (cs, fm, ref offs) = keys[i];
439 buf.extend_from_slice(&cs.to_le_bytes());
440 buf.extend_from_slice(&fm.to_le_bytes());
441 for &o in offs {
442 push_offset(&mut buf, o, offset_size);
443 }
444 if i < children.len() {
445 push_offset(&mut buf, children[i], offset_size);
446 }
447 }
448
449 buf
450 }
451
452 fn push_offset(buf: &mut Vec<u8>, val: u64, size: u8) {
453 match size {
454 4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
455 8 => buf.extend_from_slice(&val.to_le_bytes()),
456 _ => panic!("unsupported offset size in test"),
457 }
458 }
459
460 fn push_length(buf: &mut Vec<u8>, val: u64, size: u8) {
461 match size {
462 4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
463 8 => buf.extend_from_slice(&val.to_le_bytes()),
464 _ => panic!("unsupported length size in test"),
465 }
466 }
467
468 #[test]
469 fn test_parse_group_leaf_node() {
470 let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
471 let data = build_group_node(
472 0, 2, undef8, undef8, &[0, 8, 16], &[0x1000, 0x2000], 8,
479 8,
480 );
481
482 let mut cursor = Cursor::new(&data);
483 let node = BTreeV1Node::parse(&mut cursor, 8, 8, None).unwrap();
484
485 assert_eq!(node.node_type, 0);
486 assert_eq!(node.level, 0);
487 assert_eq!(node.entries_used, 2);
488 assert!(Cursor::is_undefined_offset(node.left_sibling, 8));
489 assert!(Cursor::is_undefined_offset(node.right_sibling, 8));
490 assert_eq!(node.keys.len(), 3);
491 assert_eq!(node.children.len(), 2);
492
493 match &node.keys[0] {
494 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
495 _ => panic!("expected Group key"),
496 }
497 match &node.keys[1] {
498 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 8),
499 _ => panic!("expected Group key"),
500 }
501 assert_eq!(node.children[0], 0x1000);
502 assert_eq!(node.children[1], 0x2000);
503 }
504
505 #[test]
506 fn test_parse_rawdata_leaf_node() {
507 let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
508 let data = build_rawdata_node(
510 0, 1, undef8,
513 undef8,
514 &[
515 (1024, 0, vec![0, 0, 0]), (1024, 0, vec![10, 20, 0]), ],
518 &[0x5000], 8,
520 );
521
522 let mut cursor = Cursor::new(&data);
523 let node = BTreeV1Node::parse(&mut cursor, 8, 8, Some(2)).unwrap();
524
525 assert_eq!(node.node_type, 1);
526 assert_eq!(node.level, 0);
527 assert_eq!(node.entries_used, 1);
528 assert_eq!(node.keys.len(), 2);
529 assert_eq!(node.children.len(), 1);
530
531 match &node.keys[0] {
532 BTreeV1Key::RawData {
533 chunk_size,
534 filter_mask,
535 offsets,
536 } => {
537 assert_eq!(*chunk_size, 1024);
538 assert_eq!(*filter_mask, 0);
539 assert_eq!(offsets, &[0, 0, 0]);
540 }
541 _ => panic!("expected RawData key"),
542 }
543
544 assert_eq!(node.children[0], 0x5000);
545 }
546
547 #[test]
548 fn test_parse_rawdata_leaf_4byte() {
549 let undef4 = 0xFFFF_FFFFu64;
550 let data = build_rawdata_node(
551 0,
552 1,
553 undef4,
554 undef4,
555 &[
556 (512, 0x03, vec![0, 0]), (512, 0, vec![100, 0]),
558 ],
559 &[0x3000],
560 4,
561 );
562
563 let mut cursor = Cursor::new(&data);
564 let node = BTreeV1Node::parse(&mut cursor, 4, 4, Some(1)).unwrap();
565
566 assert_eq!(node.node_type, 1);
567 match &node.keys[0] {
568 BTreeV1Key::RawData {
569 filter_mask,
570 offsets,
571 ..
572 } => {
573 assert_eq!(*filter_mask, 0x03);
574 assert_eq!(offsets, &[0, 0]);
575 }
576 _ => panic!("expected RawData key"),
577 }
578 }
579
580 #[test]
581 fn test_bad_signature() {
582 let mut data = build_group_node(0, 0, u64::MAX, u64::MAX, &[0], &[], 8, 8);
583 data[0] = b'X'; let mut cursor = Cursor::new(&data);
585 assert!(matches!(
586 BTreeV1Node::parse(&mut cursor, 8, 8, None),
587 Err(Error::InvalidBTreeSignature)
588 ));
589 }
590
591 #[test]
592 fn test_collect_leaves_single_leaf() {
593 let undef8 = u64::MAX;
594 let node_data = build_group_node(0, 2, undef8, undef8, &[0, 5, 10], &[0x100, 0x200], 8, 8);
596
597 let results = collect_btree_v1_leaves(&node_data, 0, 8, 8, None, &[], None).unwrap();
598
599 assert_eq!(results.len(), 2);
600 match &results[0].0 {
601 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
602 _ => panic!("expected Group key"),
603 }
604 assert_eq!(results[0].1, 0x100);
605 assert_eq!(results[1].1, 0x200);
606 }
607
608 #[test]
609 fn test_collect_leaves_two_level_tree() {
610 let undef8 = u64::MAX;
611
612 let leaf_a = build_group_node(0, 1, undef8, undef8, &[0, 5], &[0xA00], 8, 8);
618
619 let leaf_b = build_group_node(0, 1, undef8, undef8, &[10, 15], &[0xB00], 8, 8);
620
621 let root = build_group_node(
622 1,
623 2,
624 undef8,
625 undef8,
626 &[0, 5, 15], &[1000, 2000], 8,
629 8,
630 );
631
632 let total_size = 3000 + leaf_b.len();
634 let mut file_data = vec![0u8; total_size];
635 file_data[..root.len()].copy_from_slice(&root);
636 file_data[1000..1000 + leaf_a.len()].copy_from_slice(&leaf_a);
637 file_data[2000..2000 + leaf_b.len()].copy_from_slice(&leaf_b);
638
639 let results = collect_btree_v1_leaves(&file_data, 0, 8, 8, None, &[], None).unwrap();
640
641 assert_eq!(results.len(), 2);
642 assert_eq!(results[0].1, 0xA00);
643 assert_eq!(results[1].1, 0xB00);
644 }
645
646 #[test]
647 fn test_collect_undefined_root() {
648 let data = vec![0u8; 100];
650 let results = collect_btree_v1_leaves(&data, u64::MAX, 8, 8, None, &[], None).unwrap();
651 assert!(results.is_empty());
652 }
653}