1use crate::error::{Error, Result};
13use crate::io::Cursor;
14
15const BTREE_V1_SIGNATURE: [u8; 4] = *b"TREE";
17
18#[derive(Debug, Clone)]
20pub enum BTreeV1Key {
21 Group { local_heap_offset: u64 },
23 RawData {
26 chunk_size: u32,
27 filter_mask: u32,
28 offsets: Vec<u64>,
29 },
30}
31
32#[derive(Debug, Clone)]
34pub struct BTreeV1Node {
35 pub node_type: u8,
37 pub level: u8,
39 pub entries_used: u16,
41 pub left_sibling: u64,
43 pub right_sibling: u64,
45 pub keys: Vec<BTreeV1Key>,
47 pub children: Vec<u64>,
49}
50
51impl BTreeV1Node {
52 pub fn parse(
69 cursor: &mut Cursor,
70 offset_size: u8,
71 length_size: u8,
72 ndims: Option<u32>,
73 ) -> Result<Self> {
74 let sig = cursor.read_bytes(4)?;
75 if sig != BTREE_V1_SIGNATURE {
76 return Err(Error::InvalidBTreeSignature);
77 }
78
79 let node_type = cursor.read_u8()?;
80 let level = cursor.read_u8()?;
81 let entries_used = cursor.read_u16_le()?;
82 let left_sibling = cursor.read_offset(offset_size)?;
83 let right_sibling = cursor.read_offset(offset_size)?;
84
85 let num_keys = entries_used as usize + 1;
86 let num_children = entries_used as usize;
87
88 let mut keys = Vec::with_capacity(num_keys);
89 let mut children = Vec::with_capacity(num_children);
90
91 for i in 0..num_keys {
94 let key = match node_type {
95 0 => parse_group_key(cursor, length_size)?,
96 1 => parse_raw_data_key(cursor, offset_size, ndims)?,
97 _ => {
98 return Err(Error::InvalidData(format!(
99 "unknown v1 B-tree node type: {node_type}"
100 )));
101 }
102 };
103 keys.push(key);
104
105 if i < num_children {
107 let child_addr = cursor.read_offset(offset_size)?;
108 children.push(child_addr);
109 }
110 }
111
112 Ok(BTreeV1Node {
113 node_type,
114 level,
115 entries_used,
116 left_sibling,
117 right_sibling,
118 keys,
119 children,
120 })
121 }
122}
123
124fn parse_group_key(cursor: &mut Cursor, length_size: u8) -> Result<BTreeV1Key> {
126 let local_heap_offset = cursor.read_length(length_size)?;
127 Ok(BTreeV1Key::Group { local_heap_offset })
128}
129
130fn parse_raw_data_key(
138 cursor: &mut Cursor,
139 offset_size: u8,
140 ndims: Option<u32>,
141) -> Result<BTreeV1Key> {
142 let nd = ndims.ok_or_else(|| {
143 Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
144 })?;
145
146 let chunk_size = cursor.read_u32_le()?;
147 let filter_mask = cursor.read_u32_le()?;
148
149 let num_offsets = nd as usize + 1;
150 let mut offsets = Vec::with_capacity(num_offsets);
151 for _ in 0..num_offsets {
152 offsets.push(cursor.read_offset(offset_size)?);
153 }
154
155 Ok(BTreeV1Key::RawData {
156 chunk_size,
157 filter_mask,
158 offsets,
159 })
160}
161
162pub fn collect_btree_v1_leaves(
171 data: &[u8],
172 root_address: u64,
173 offset_size: u8,
174 length_size: u8,
175 ndims: Option<u32>,
176 chunk_dims: &[u32],
177 chunk_bounds: Option<(&[u64], &[u64])>,
178) -> Result<Vec<(BTreeV1Key, u64)>> {
179 let mut results = Vec::new();
180 collect_recursive(
181 BTreeV1CollectContext {
182 data,
183 offset_size,
184 length_size,
185 ndims,
186 chunk_dims,
187 chunk_bounds,
188 },
189 root_address,
190 &mut results,
191 )?;
192 Ok(results)
193}
194
195#[derive(Clone, Copy)]
196struct BTreeV1CollectContext<'a> {
197 data: &'a [u8],
198 offset_size: u8,
199 length_size: u8,
200 ndims: Option<u32>,
201 chunk_dims: &'a [u32],
202 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
203}
204
205fn collect_recursive(
207 context: BTreeV1CollectContext<'_>,
208 address: u64,
209 results: &mut Vec<(BTreeV1Key, u64)>,
210) -> Result<()> {
211 if Cursor::is_undefined_offset(address, context.offset_size) {
212 return Ok(());
213 }
214
215 if address as usize >= context.data.len() {
216 return Err(Error::OffsetOutOfBounds(address));
217 }
218
219 let mut cursor = Cursor::new(context.data);
220 cursor.set_position(address);
221
222 let node = BTreeV1Node::parse(
223 &mut cursor,
224 context.offset_size,
225 context.length_size,
226 context.ndims,
227 )?;
228
229 if node.level == 0 {
230 for (i, child_addr) in node.children.iter().enumerate() {
232 let include = match &node.keys[i] {
233 BTreeV1Key::RawData { offsets, .. } => {
234 context.chunk_bounds.map_or(true, |(first, last)| {
235 offsets
236 .iter()
237 .zip(context.chunk_dims.iter())
238 .enumerate()
239 .all(|(dim, (offset, chunk_dim))| {
240 let chunk_index = *offset / u64::from(*chunk_dim);
241 chunk_index >= first[dim] && chunk_index <= last[dim]
242 })
243 })
244 }
245 _ => true,
246 };
247
248 if include {
249 results.push((node.keys[i].clone(), *child_addr));
250 }
251 }
252 } else {
253 for child_addr in &node.children {
255 collect_recursive(context, *child_addr, results)?;
256 }
257 }
258
259 Ok(())
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265
266 #[allow(clippy::too_many_arguments)]
268 fn build_group_node(
269 level: u8,
270 entries_used: u16,
271 left_sibling: u64,
272 right_sibling: u64,
273 keys: &[u64], children: &[u64], offset_size: u8,
276 length_size: u8,
277 ) -> Vec<u8> {
278 assert_eq!(keys.len(), entries_used as usize + 1);
279 assert_eq!(children.len(), entries_used as usize);
280
281 let mut buf = Vec::new();
282 buf.extend_from_slice(b"TREE");
283 buf.push(0); buf.push(level);
285 buf.extend_from_slice(&entries_used.to_le_bytes());
286 push_offset(&mut buf, left_sibling, offset_size);
287 push_offset(&mut buf, right_sibling, offset_size);
288
289 for i in 0..keys.len() {
290 push_length(&mut buf, keys[i], length_size);
291 if i < children.len() {
292 push_offset(&mut buf, children[i], offset_size);
293 }
294 }
295
296 buf
297 }
298
299 fn build_rawdata_node(
301 level: u8,
302 entries_used: u16,
303 left_sibling: u64,
304 right_sibling: u64,
305 keys: &[(u32, u32, Vec<u64>)], children: &[u64],
307 offset_size: u8,
308 ) -> Vec<u8> {
309 assert_eq!(keys.len(), entries_used as usize + 1);
310 assert_eq!(children.len(), entries_used as usize);
311
312 let mut buf = Vec::new();
313 buf.extend_from_slice(b"TREE");
314 buf.push(1); buf.push(level);
316 buf.extend_from_slice(&entries_used.to_le_bytes());
317 push_offset(&mut buf, left_sibling, offset_size);
318 push_offset(&mut buf, right_sibling, offset_size);
319
320 for i in 0..keys.len() {
321 let (cs, fm, ref offs) = keys[i];
322 buf.extend_from_slice(&cs.to_le_bytes());
323 buf.extend_from_slice(&fm.to_le_bytes());
324 for &o in offs {
325 push_offset(&mut buf, o, offset_size);
326 }
327 if i < children.len() {
328 push_offset(&mut buf, children[i], offset_size);
329 }
330 }
331
332 buf
333 }
334
335 fn push_offset(buf: &mut Vec<u8>, val: u64, size: u8) {
336 match size {
337 4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
338 8 => buf.extend_from_slice(&val.to_le_bytes()),
339 _ => panic!("unsupported offset size in test"),
340 }
341 }
342
343 fn push_length(buf: &mut Vec<u8>, val: u64, size: u8) {
344 match size {
345 4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
346 8 => buf.extend_from_slice(&val.to_le_bytes()),
347 _ => panic!("unsupported length size in test"),
348 }
349 }
350
351 #[test]
352 fn test_parse_group_leaf_node() {
353 let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
354 let data = build_group_node(
355 0, 2, undef8, undef8, &[0, 8, 16], &[0x1000, 0x2000], 8,
362 8,
363 );
364
365 let mut cursor = Cursor::new(&data);
366 let node = BTreeV1Node::parse(&mut cursor, 8, 8, None).unwrap();
367
368 assert_eq!(node.node_type, 0);
369 assert_eq!(node.level, 0);
370 assert_eq!(node.entries_used, 2);
371 assert!(Cursor::is_undefined_offset(node.left_sibling, 8));
372 assert!(Cursor::is_undefined_offset(node.right_sibling, 8));
373 assert_eq!(node.keys.len(), 3);
374 assert_eq!(node.children.len(), 2);
375
376 match &node.keys[0] {
377 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
378 _ => panic!("expected Group key"),
379 }
380 match &node.keys[1] {
381 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 8),
382 _ => panic!("expected Group key"),
383 }
384 assert_eq!(node.children[0], 0x1000);
385 assert_eq!(node.children[1], 0x2000);
386 }
387
388 #[test]
389 fn test_parse_rawdata_leaf_node() {
390 let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
391 let data = build_rawdata_node(
393 0, 1, undef8,
396 undef8,
397 &[
398 (1024, 0, vec![0, 0, 0]), (1024, 0, vec![10, 20, 0]), ],
401 &[0x5000], 8,
403 );
404
405 let mut cursor = Cursor::new(&data);
406 let node = BTreeV1Node::parse(&mut cursor, 8, 8, Some(2)).unwrap();
407
408 assert_eq!(node.node_type, 1);
409 assert_eq!(node.level, 0);
410 assert_eq!(node.entries_used, 1);
411 assert_eq!(node.keys.len(), 2);
412 assert_eq!(node.children.len(), 1);
413
414 match &node.keys[0] {
415 BTreeV1Key::RawData {
416 chunk_size,
417 filter_mask,
418 offsets,
419 } => {
420 assert_eq!(*chunk_size, 1024);
421 assert_eq!(*filter_mask, 0);
422 assert_eq!(offsets, &[0, 0, 0]);
423 }
424 _ => panic!("expected RawData key"),
425 }
426
427 assert_eq!(node.children[0], 0x5000);
428 }
429
430 #[test]
431 fn test_parse_rawdata_leaf_4byte() {
432 let undef4 = 0xFFFF_FFFFu64;
433 let data = build_rawdata_node(
434 0,
435 1,
436 undef4,
437 undef4,
438 &[
439 (512, 0x03, vec![0, 0]), (512, 0, vec![100, 0]),
441 ],
442 &[0x3000],
443 4,
444 );
445
446 let mut cursor = Cursor::new(&data);
447 let node = BTreeV1Node::parse(&mut cursor, 4, 4, Some(1)).unwrap();
448
449 assert_eq!(node.node_type, 1);
450 match &node.keys[0] {
451 BTreeV1Key::RawData {
452 filter_mask,
453 offsets,
454 ..
455 } => {
456 assert_eq!(*filter_mask, 0x03);
457 assert_eq!(offsets, &[0, 0]);
458 }
459 _ => panic!("expected RawData key"),
460 }
461 }
462
463 #[test]
464 fn test_bad_signature() {
465 let mut data = build_group_node(0, 0, u64::MAX, u64::MAX, &[0], &[], 8, 8);
466 data[0] = b'X'; let mut cursor = Cursor::new(&data);
468 assert!(matches!(
469 BTreeV1Node::parse(&mut cursor, 8, 8, None),
470 Err(Error::InvalidBTreeSignature)
471 ));
472 }
473
474 #[test]
475 fn test_collect_leaves_single_leaf() {
476 let undef8 = u64::MAX;
477 let node_data = build_group_node(0, 2, undef8, undef8, &[0, 5, 10], &[0x100, 0x200], 8, 8);
479
480 let results = collect_btree_v1_leaves(&node_data, 0, 8, 8, None, &[], None).unwrap();
481
482 assert_eq!(results.len(), 2);
483 match &results[0].0 {
484 BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
485 _ => panic!("expected Group key"),
486 }
487 assert_eq!(results[0].1, 0x100);
488 assert_eq!(results[1].1, 0x200);
489 }
490
491 #[test]
492 fn test_collect_leaves_two_level_tree() {
493 let undef8 = u64::MAX;
494
495 let leaf_a = build_group_node(0, 1, undef8, undef8, &[0, 5], &[0xA00], 8, 8);
501
502 let leaf_b = build_group_node(0, 1, undef8, undef8, &[10, 15], &[0xB00], 8, 8);
503
504 let root = build_group_node(
505 1,
506 2,
507 undef8,
508 undef8,
509 &[0, 5, 15], &[1000, 2000], 8,
512 8,
513 );
514
515 let total_size = 3000 + leaf_b.len();
517 let mut file_data = vec![0u8; total_size];
518 file_data[..root.len()].copy_from_slice(&root);
519 file_data[1000..1000 + leaf_a.len()].copy_from_slice(&leaf_a);
520 file_data[2000..2000 + leaf_b.len()].copy_from_slice(&leaf_b);
521
522 let results = collect_btree_v1_leaves(&file_data, 0, 8, 8, None, &[], None).unwrap();
523
524 assert_eq!(results.len(), 2);
525 assert_eq!(results[0].1, 0xA00);
526 assert_eq!(results[1].1, 0xB00);
527 }
528
529 #[test]
530 fn test_collect_undefined_root() {
531 let data = vec![0u8; 100];
533 let results = collect_btree_v1_leaves(&data, u64::MAX, 8, 8, None, &[], None).unwrap();
534 assert!(results.is_empty());
535 }
536}