Skip to main content

rust_hdf5/format/chunk_index/
extensible_array.rs

1//! Extensible Array (EA) chunk index structures for HDF5.
2//!
3//! Implements the on-disk format for the extensible array used to index
4//! chunked datasets with one unlimited dimension (the typical SWMR use case).
5//!
6//! Structures:
7//!   - Header (EAHD): metadata and statistics about the extensible array
8//!   - Index Block (EAIB): holds direct chunk addresses and pointers to data/super blocks
9//!   - Data Block (EADB): holds additional chunk addresses when the index block is full
10
11use crate::format::checksum::checksum_metadata;
12use crate::format::{FormatContext, FormatError, FormatResult, UNDEF_ADDR};
13
14/// Signature for the extensible array header.
15pub const EAHD_SIGNATURE: [u8; 4] = *b"EAHD";
16/// Signature for the extensible array index block.
17pub const EAIB_SIGNATURE: [u8; 4] = *b"EAIB";
18/// Signature for the extensible array data block.
19pub const EADB_SIGNATURE: [u8; 4] = *b"EADB";
20
21/// Extensible array version.
22pub const EA_VERSION: u8 = 0;
23
24/// Class ID for unfiltered chunks (H5EA_CLS_CHUNK).
25pub const EA_CLS_CHUNK: u8 = 0;
26/// Class ID for filtered chunks (H5EA_CLS_FILT_CHUNK).
27pub const EA_CLS_FILT_CHUNK: u8 = 1;
28
29/// A filtered chunk element stored in the extensible array.
30#[derive(Debug, Clone, Copy, PartialEq)]
31pub struct FilteredChunkEntry {
32    /// Address of the compressed chunk data in the file.
33    pub addr: u64,
34    /// Size of the compressed chunk in bytes.
35    pub nbytes: u64,
36    /// Filter mask — bit N set means filter N was NOT applied.
37    pub filter_mask: u32,
38}
39
40impl FilteredChunkEntry {
41    pub fn undef() -> Self {
42        Self {
43            addr: UNDEF_ADDR,
44            nbytes: 0,
45            filter_mask: 0,
46        }
47    }
48
49    pub fn is_undef(&self) -> bool {
50        self.addr == UNDEF_ADDR
51    }
52
53    /// Compute raw element size on disk: sizeof_addr + chunk_size_len + 4.
54    pub fn raw_size(sizeof_addr: u8, chunk_size_len: u8) -> u8 {
55        sizeof_addr + chunk_size_len + 4
56    }
57
58    /// Encode a single filtered entry.
59    pub fn encode(&self, sizeof_addr: usize, chunk_size_len: usize) -> Vec<u8> {
60        let mut buf = Vec::with_capacity(sizeof_addr + chunk_size_len + 4);
61        buf.extend_from_slice(&self.addr.to_le_bytes()[..sizeof_addr]);
62        buf.extend_from_slice(&self.nbytes.to_le_bytes()[..chunk_size_len]);
63        buf.extend_from_slice(&self.filter_mask.to_le_bytes());
64        buf
65    }
66
67    /// Decode a single filtered entry.
68    pub fn decode(buf: &[u8], sizeof_addr: usize, chunk_size_len: usize) -> Self {
69        let addr = read_addr(buf, sizeof_addr);
70        let nbytes = read_size(&buf[sizeof_addr..], chunk_size_len);
71        let off = sizeof_addr + chunk_size_len;
72        let filter_mask = u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]]);
73        Self {
74            addr,
75            nbytes,
76            filter_mask,
77        }
78    }
79}
80
81/// Compute chunk_size_len: bytes needed to encode the uncompressed chunk size.
82/// Formula from HDF5 C: 1 + (log2(chunk_size) + 8) / 8, capped at 8.
83pub fn compute_chunk_size_len(uncompressed_chunk_bytes: u64) -> u8 {
84    if uncompressed_chunk_bytes == 0 {
85        return 1;
86    }
87    let log2 = 63 - uncompressed_chunk_bytes.leading_zeros();
88    let len = 1 + (log2 + 8) / 8;
89    std::cmp::min(len, 8) as u8
90}
91
92/// Extensible array header.
93///
94/// On-disk layout:
95/// ```text
96/// "EAHD"(4) + version=0(1) + class_id(1)
97/// + raw_elmt_size(1) + max_nelmts_bits(1) + idx_blk_elmts(1)
98/// + data_blk_min_elmts(1) + sup_blk_min_data_ptrs(1)
99/// + max_dblk_page_nelmts_bits(1)
100/// + 6 statistics (each sizeof_size bytes)
101/// + idx_blk_addr (sizeof_addr)
102/// + checksum(4)
103/// ```
104#[derive(Debug, Clone, PartialEq)]
105pub struct ExtensibleArrayHeader {
106    pub class_id: u8,
107    pub raw_elmt_size: u8,
108    pub max_nelmts_bits: u8,
109    pub idx_blk_elmts: u8,
110    pub data_blk_min_elmts: u8,
111    pub sup_blk_min_data_ptrs: u8,
112    pub max_dblk_page_nelmts_bits: u8,
113    // statistics
114    pub num_sblks_created: u64,
115    pub size_sblks_created: u64,
116    pub num_dblks_created: u64,
117    pub size_dblks_created: u64,
118    pub max_idx_set: u64,
119    pub num_elmts_realized: u64,
120    pub idx_blk_addr: u64,
121}
122
123impl ExtensibleArrayHeader {
124    /// Create a new header for unfiltered chunk indexing.
125    pub fn new_for_chunks(ctx: &FormatContext) -> Self {
126        Self {
127            class_id: EA_CLS_CHUNK,
128            raw_elmt_size: ctx.sizeof_addr,
129            max_nelmts_bits: 32,
130            idx_blk_elmts: 4,
131            data_blk_min_elmts: 16,
132            sup_blk_min_data_ptrs: 4,
133            max_dblk_page_nelmts_bits: 10,
134            num_sblks_created: 0,
135            size_sblks_created: 0,
136            num_dblks_created: 0,
137            size_dblks_created: 0,
138            max_idx_set: 0,
139            num_elmts_realized: 0,
140            idx_blk_addr: UNDEF_ADDR,
141        }
142    }
143
144    /// Create a new header for filtered (compressed) chunk indexing.
145    pub fn new_for_filtered_chunks(ctx: &FormatContext, chunk_size_len: u8) -> Self {
146        Self {
147            class_id: EA_CLS_FILT_CHUNK,
148            raw_elmt_size: FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len),
149            max_nelmts_bits: 32,
150            idx_blk_elmts: 4,
151            data_blk_min_elmts: 16,
152            sup_blk_min_data_ptrs: 4,
153            max_dblk_page_nelmts_bits: 10,
154            num_sblks_created: 0,
155            size_sblks_created: 0,
156            num_dblks_created: 0,
157            size_dblks_created: 0,
158            max_idx_set: 0,
159            num_elmts_realized: 0,
160            idx_blk_addr: UNDEF_ADDR,
161        }
162    }
163
164    /// Compute the encoded size (for pre-allocation).
165    pub fn encoded_size(&self, ctx: &FormatContext) -> usize {
166        let ss = ctx.sizeof_size as usize;
167        let sa = ctx.sizeof_addr as usize;
168        // signature(4) + version(1) + class_id(1)
169        // + raw_elmt_size(1) + max_nelmts_bits(1) + idx_blk_elmts(1)
170        // + data_blk_min_elmts(1) + sup_blk_min_data_ptrs(1)
171        // + max_dblk_page_nelmts_bits(1)
172        // + 6 * sizeof_size (statistics)
173        // + sizeof_addr (idx_blk_addr)
174        // + checksum(4)
175        4 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 6 * ss + sa + 4
176    }
177
178    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
179        let ss = ctx.sizeof_size as usize;
180        let sa = ctx.sizeof_addr as usize;
181        let size = self.encoded_size(ctx);
182        let mut buf = Vec::with_capacity(size);
183
184        buf.extend_from_slice(&EAHD_SIGNATURE);
185        buf.push(EA_VERSION);
186        buf.push(self.class_id);
187        buf.push(self.raw_elmt_size);
188        buf.push(self.max_nelmts_bits);
189        buf.push(self.idx_blk_elmts);
190        buf.push(self.data_blk_min_elmts);
191        buf.push(self.sup_blk_min_data_ptrs);
192        buf.push(self.max_dblk_page_nelmts_bits);
193
194        // Statistics
195        buf.extend_from_slice(&self.num_sblks_created.to_le_bytes()[..ss]);
196        buf.extend_from_slice(&self.size_sblks_created.to_le_bytes()[..ss]);
197        buf.extend_from_slice(&self.num_dblks_created.to_le_bytes()[..ss]);
198        buf.extend_from_slice(&self.size_dblks_created.to_le_bytes()[..ss]);
199        buf.extend_from_slice(&self.max_idx_set.to_le_bytes()[..ss]);
200        buf.extend_from_slice(&self.num_elmts_realized.to_le_bytes()[..ss]);
201
202        // Index block address
203        buf.extend_from_slice(&self.idx_blk_addr.to_le_bytes()[..sa]);
204
205        // Checksum
206        let cksum = checksum_metadata(&buf);
207        buf.extend_from_slice(&cksum.to_le_bytes());
208
209        debug_assert_eq!(buf.len(), size);
210        buf
211    }
212
213    pub fn decode(buf: &[u8], ctx: &FormatContext) -> FormatResult<Self> {
214        let ss = ctx.sizeof_size as usize;
215        let sa = ctx.sizeof_addr as usize;
216        let min_size = 4 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 6 * ss + sa + 4;
217
218        if buf.len() < min_size {
219            return Err(FormatError::BufferTooShort {
220                needed: min_size,
221                available: buf.len(),
222            });
223        }
224
225        if buf[0..4] != EAHD_SIGNATURE {
226            return Err(FormatError::InvalidSignature);
227        }
228
229        let version = buf[4];
230        if version != EA_VERSION {
231            return Err(FormatError::InvalidVersion(version));
232        }
233
234        // Verify checksum
235        let data_end = min_size - 4;
236        let stored_cksum = u32::from_le_bytes([
237            buf[data_end],
238            buf[data_end + 1],
239            buf[data_end + 2],
240            buf[data_end + 3],
241        ]);
242        let computed_cksum = checksum_metadata(&buf[..data_end]);
243        if stored_cksum != computed_cksum {
244            return Err(FormatError::ChecksumMismatch {
245                expected: stored_cksum,
246                computed: computed_cksum,
247            });
248        }
249
250        let mut pos = 5;
251        let class_id = buf[pos];
252        pos += 1;
253        let raw_elmt_size = buf[pos];
254        pos += 1;
255        let max_nelmts_bits = buf[pos];
256        pos += 1;
257        let idx_blk_elmts = buf[pos];
258        pos += 1;
259        let data_blk_min_elmts = buf[pos];
260        pos += 1;
261        let sup_blk_min_data_ptrs = buf[pos];
262        pos += 1;
263        let max_dblk_page_nelmts_bits = buf[pos];
264        pos += 1;
265
266        let num_sblks_created = read_size(&buf[pos..], ss);
267        pos += ss;
268        let size_sblks_created = read_size(&buf[pos..], ss);
269        pos += ss;
270        let num_dblks_created = read_size(&buf[pos..], ss);
271        pos += ss;
272        let size_dblks_created = read_size(&buf[pos..], ss);
273        pos += ss;
274        let max_idx_set = read_size(&buf[pos..], ss);
275        pos += ss;
276        let num_elmts_realized = read_size(&buf[pos..], ss);
277        pos += ss;
278
279        let idx_blk_addr = read_addr(&buf[pos..], sa);
280
281        Ok(Self {
282            class_id,
283            raw_elmt_size,
284            max_nelmts_bits,
285            idx_blk_elmts,
286            data_blk_min_elmts,
287            sup_blk_min_data_ptrs,
288            max_dblk_page_nelmts_bits,
289            num_sblks_created,
290            size_sblks_created,
291            num_dblks_created,
292            size_dblks_created,
293            max_idx_set,
294            num_elmts_realized,
295            idx_blk_addr,
296        })
297    }
298}
299
300/// Extensible array index block.
301///
302/// On-disk layout:
303/// ```text
304/// "EAIB"(4) + version=0(1) + class_id(1)
305/// + header_addr(sizeof_addr)
306/// + elements (idx_blk_elmts * raw_elmt_size bytes)
307/// + data_block_addresses (ndblk_addrs * sizeof_addr)
308/// + super_block_addresses (nsblk_addrs * sizeof_addr)
309/// + checksum(4)
310/// ```
311#[derive(Debug, Clone, PartialEq)]
312pub struct ExtensibleArrayIndexBlock {
313    pub class_id: u8,
314    pub header_addr: u64,
315    /// Direct chunk addresses in the index block.
316    pub elements: Vec<u64>,
317    /// Data block addresses.
318    pub dblk_addrs: Vec<u64>,
319    /// Super block addresses.
320    pub sblk_addrs: Vec<u64>,
321}
322
323/// Filtered variant of the extensible array index block.
324///
325/// Stores `FilteredChunkEntry` elements instead of raw addresses.
326#[derive(Debug, Clone, PartialEq)]
327pub struct FilteredIndexBlock {
328    pub class_id: u8,
329    pub header_addr: u64,
330    pub elements: Vec<FilteredChunkEntry>,
331    pub dblk_addrs: Vec<u64>,
332    pub sblk_addrs: Vec<u64>,
333}
334
335impl FilteredIndexBlock {
336    pub fn new(
337        header_addr: u64,
338        idx_blk_elmts: u8,
339        ndblk_addrs: usize,
340        nsblk_addrs: usize,
341    ) -> Self {
342        Self {
343            class_id: EA_CLS_FILT_CHUNK,
344            header_addr,
345            elements: vec![FilteredChunkEntry::undef(); idx_blk_elmts as usize],
346            dblk_addrs: vec![UNDEF_ADDR; ndblk_addrs],
347            sblk_addrs: vec![UNDEF_ADDR; nsblk_addrs],
348        }
349    }
350
351    pub fn encode(&self, ctx: &FormatContext, chunk_size_len: u8) -> Vec<u8> {
352        let sa = ctx.sizeof_addr as usize;
353        let elmt_size = FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len) as usize;
354        let size = 4
355            + 1
356            + 1
357            + sa
358            + self.elements.len() * elmt_size
359            + self.dblk_addrs.len() * sa
360            + self.sblk_addrs.len() * sa
361            + 4;
362        let mut buf = Vec::with_capacity(size);
363
364        buf.extend_from_slice(&EAIB_SIGNATURE);
365        buf.push(EA_VERSION);
366        buf.push(self.class_id);
367        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
368
369        for elem in &self.elements {
370            buf.extend_from_slice(&elem.encode(sa, chunk_size_len as usize));
371        }
372        for &addr in &self.dblk_addrs {
373            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
374        }
375        for &addr in &self.sblk_addrs {
376            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
377        }
378
379        let cksum = checksum_metadata(&buf);
380        buf.extend_from_slice(&cksum.to_le_bytes());
381        debug_assert_eq!(buf.len(), size);
382        buf
383    }
384
385    pub fn decode(
386        buf: &[u8],
387        ctx: &FormatContext,
388        idx_blk_elmts: usize,
389        ndblk_addrs: usize,
390        nsblk_addrs: usize,
391        chunk_size_len: u8,
392    ) -> FormatResult<Self> {
393        let sa = ctx.sizeof_addr as usize;
394        let elmt_size = FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len) as usize;
395        let min_size =
396            4 + 1 + 1 + sa + idx_blk_elmts * elmt_size + ndblk_addrs * sa + nsblk_addrs * sa + 4;
397
398        if buf.len() < min_size {
399            return Err(FormatError::BufferTooShort {
400                needed: min_size,
401                available: buf.len(),
402            });
403        }
404        if buf[0..4] != EAIB_SIGNATURE {
405            return Err(FormatError::InvalidSignature);
406        }
407        if buf[4] != EA_VERSION {
408            return Err(FormatError::InvalidVersion(buf[4]));
409        }
410
411        let data_end = min_size - 4;
412        let stored = u32::from_le_bytes([
413            buf[data_end],
414            buf[data_end + 1],
415            buf[data_end + 2],
416            buf[data_end + 3],
417        ]);
418        let computed = checksum_metadata(&buf[..data_end]);
419        if stored != computed {
420            return Err(FormatError::ChecksumMismatch {
421                expected: stored,
422                computed,
423            });
424        }
425
426        let class_id = buf[5];
427        let mut pos = 6;
428        let header_addr = read_addr(&buf[pos..], sa);
429        pos += sa;
430
431        let mut elements = Vec::with_capacity(idx_blk_elmts);
432        for _ in 0..idx_blk_elmts {
433            elements.push(FilteredChunkEntry::decode(
434                &buf[pos..],
435                sa,
436                chunk_size_len as usize,
437            ));
438            pos += elmt_size;
439        }
440        let mut dblk_addrs = Vec::with_capacity(ndblk_addrs);
441        for _ in 0..ndblk_addrs {
442            dblk_addrs.push(read_addr(&buf[pos..], sa));
443            pos += sa;
444        }
445        let mut sblk_addrs = Vec::with_capacity(nsblk_addrs);
446        for _ in 0..nsblk_addrs {
447            sblk_addrs.push(read_addr(&buf[pos..], sa));
448            pos += sa;
449        }
450
451        Ok(Self {
452            class_id,
453            header_addr,
454            elements,
455            dblk_addrs,
456            sblk_addrs,
457        })
458    }
459}
460
461/// Filtered variant of the extensible array data block.
462#[derive(Debug, Clone, PartialEq)]
463pub struct FilteredDataBlock {
464    pub class_id: u8,
465    pub header_addr: u64,
466    pub block_offset: u64,
467    pub elements: Vec<FilteredChunkEntry>,
468}
469
470impl FilteredDataBlock {
471    pub fn new(header_addr: u64, block_offset: u64, nelmts: usize) -> Self {
472        Self {
473            class_id: EA_CLS_FILT_CHUNK,
474            header_addr,
475            block_offset,
476            elements: vec![FilteredChunkEntry::undef(); nelmts],
477        }
478    }
479
480    pub fn encode(&self, ctx: &FormatContext, max_nelmts_bits: u8, chunk_size_len: u8) -> Vec<u8> {
481        let sa = ctx.sizeof_addr as usize;
482        let bo_size = ExtensibleArrayDataBlock::block_offset_size(max_nelmts_bits);
483        let elmt_size = FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len) as usize;
484        let size = 4 + 1 + 1 + sa + bo_size + self.elements.len() * elmt_size + 4;
485        let mut buf = Vec::with_capacity(size);
486
487        buf.extend_from_slice(&EADB_SIGNATURE);
488        buf.push(EA_VERSION);
489        buf.push(self.class_id);
490        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
491        buf.extend_from_slice(&self.block_offset.to_le_bytes()[..bo_size]);
492
493        for elem in &self.elements {
494            buf.extend_from_slice(&elem.encode(sa, chunk_size_len as usize));
495        }
496
497        let cksum = checksum_metadata(&buf);
498        buf.extend_from_slice(&cksum.to_le_bytes());
499        debug_assert_eq!(buf.len(), size);
500        buf
501    }
502
503    pub fn decode(
504        buf: &[u8],
505        ctx: &FormatContext,
506        max_nelmts_bits: u8,
507        nelmts: usize,
508        chunk_size_len: u8,
509    ) -> FormatResult<Self> {
510        let sa = ctx.sizeof_addr as usize;
511        let bo_size = ExtensibleArrayDataBlock::block_offset_size(max_nelmts_bits);
512        let elmt_size = FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len) as usize;
513        let min_size = 4 + 1 + 1 + sa + bo_size + nelmts * elmt_size + 4;
514
515        if buf.len() < min_size {
516            return Err(FormatError::BufferTooShort {
517                needed: min_size,
518                available: buf.len(),
519            });
520        }
521        if buf[0..4] != EADB_SIGNATURE {
522            return Err(FormatError::InvalidSignature);
523        }
524        if buf[4] != EA_VERSION {
525            return Err(FormatError::InvalidVersion(buf[4]));
526        }
527
528        let data_end = min_size - 4;
529        let stored = u32::from_le_bytes([
530            buf[data_end],
531            buf[data_end + 1],
532            buf[data_end + 2],
533            buf[data_end + 3],
534        ]);
535        let computed = checksum_metadata(&buf[..data_end]);
536        if stored != computed {
537            return Err(FormatError::ChecksumMismatch {
538                expected: stored,
539                computed,
540            });
541        }
542
543        let class_id = buf[5];
544        let mut pos = 6;
545        let header_addr = read_addr(&buf[pos..], sa);
546        pos += sa;
547        let block_offset = read_size(&buf[pos..], bo_size);
548        pos += bo_size;
549
550        let mut elements = Vec::with_capacity(nelmts);
551        for _ in 0..nelmts {
552            elements.push(FilteredChunkEntry::decode(
553                &buf[pos..],
554                sa,
555                chunk_size_len as usize,
556            ));
557            pos += elmt_size;
558        }
559
560        Ok(Self {
561            class_id,
562            header_addr,
563            block_offset,
564            elements,
565        })
566    }
567}
568
569impl ExtensibleArrayIndexBlock {
570    /// Create a new empty index block.
571    pub fn new(
572        header_addr: u64,
573        idx_blk_elmts: u8,
574        ndblk_addrs: usize,
575        nsblk_addrs: usize,
576    ) -> Self {
577        Self {
578            class_id: EA_CLS_CHUNK,
579            header_addr,
580            elements: vec![UNDEF_ADDR; idx_blk_elmts as usize],
581            dblk_addrs: vec![UNDEF_ADDR; ndblk_addrs],
582            sblk_addrs: vec![UNDEF_ADDR; nsblk_addrs],
583        }
584    }
585
586    /// Compute the encoded size.
587    pub fn encoded_size(&self, ctx: &FormatContext) -> usize {
588        let sa = ctx.sizeof_addr as usize;
589        // signature(4) + version(1) + class_id(1)
590        // + header_addr(sa)
591        // + elements(n * sa)
592        // + dblk_addrs(n * sa)
593        // + sblk_addrs(n * sa)
594        // + checksum(4)
595        4 + 1
596            + 1
597            + sa
598            + self.elements.len() * sa
599            + self.dblk_addrs.len() * sa
600            + self.sblk_addrs.len() * sa
601            + 4
602    }
603
604    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
605        let sa = ctx.sizeof_addr as usize;
606        let size = self.encoded_size(ctx);
607        let mut buf = Vec::with_capacity(size);
608
609        buf.extend_from_slice(&EAIB_SIGNATURE);
610        buf.push(EA_VERSION);
611        buf.push(self.class_id);
612        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
613
614        for &elem in &self.elements {
615            buf.extend_from_slice(&elem.to_le_bytes()[..sa]);
616        }
617
618        for &addr in &self.dblk_addrs {
619            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
620        }
621
622        for &addr in &self.sblk_addrs {
623            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
624        }
625
626        let cksum = checksum_metadata(&buf);
627        buf.extend_from_slice(&cksum.to_le_bytes());
628
629        debug_assert_eq!(buf.len(), size);
630        buf
631    }
632
633    pub fn decode(
634        buf: &[u8],
635        ctx: &FormatContext,
636        idx_blk_elmts: usize,
637        ndblk_addrs: usize,
638        nsblk_addrs: usize,
639    ) -> FormatResult<Self> {
640        let sa = ctx.sizeof_addr as usize;
641        let min_size =
642            4 + 1 + 1 + sa + idx_blk_elmts * sa + ndblk_addrs * sa + nsblk_addrs * sa + 4;
643
644        if buf.len() < min_size {
645            return Err(FormatError::BufferTooShort {
646                needed: min_size,
647                available: buf.len(),
648            });
649        }
650
651        if buf[0..4] != EAIB_SIGNATURE {
652            return Err(FormatError::InvalidSignature);
653        }
654
655        let version = buf[4];
656        if version != EA_VERSION {
657            return Err(FormatError::InvalidVersion(version));
658        }
659
660        // Verify checksum
661        let data_end = min_size - 4;
662        let stored_cksum = u32::from_le_bytes([
663            buf[data_end],
664            buf[data_end + 1],
665            buf[data_end + 2],
666            buf[data_end + 3],
667        ]);
668        let computed_cksum = checksum_metadata(&buf[..data_end]);
669        if stored_cksum != computed_cksum {
670            return Err(FormatError::ChecksumMismatch {
671                expected: stored_cksum,
672                computed: computed_cksum,
673            });
674        }
675
676        let class_id = buf[5];
677        let mut pos = 6;
678        let header_addr = read_addr(&buf[pos..], sa);
679        pos += sa;
680
681        let mut elements = Vec::with_capacity(idx_blk_elmts);
682        for _ in 0..idx_blk_elmts {
683            elements.push(read_addr(&buf[pos..], sa));
684            pos += sa;
685        }
686
687        let mut dblk_addrs = Vec::with_capacity(ndblk_addrs);
688        for _ in 0..ndblk_addrs {
689            dblk_addrs.push(read_addr(&buf[pos..], sa));
690            pos += sa;
691        }
692
693        let mut sblk_addrs = Vec::with_capacity(nsblk_addrs);
694        for _ in 0..nsblk_addrs {
695            sblk_addrs.push(read_addr(&buf[pos..], sa));
696            pos += sa;
697        }
698
699        Ok(Self {
700            class_id,
701            header_addr,
702            elements,
703            dblk_addrs,
704            sblk_addrs,
705        })
706    }
707}
708
709/// Extensible array data block.
710///
711/// On-disk layout:
712/// ```text
713/// "EADB"(4) + version=0(1) + class_id(1)
714/// + header_addr(sizeof_addr)
715/// + block_offset (variable length)
716/// + elements(nelmts * raw_elmt_size)
717/// + checksum(4)
718/// ```
719#[derive(Debug, Clone, PartialEq)]
720pub struct ExtensibleArrayDataBlock {
721    pub class_id: u8,
722    pub header_addr: u64,
723    pub block_offset: u64,
724    /// Chunk addresses.
725    pub elements: Vec<u64>,
726}
727
728impl ExtensibleArrayDataBlock {
729    /// Create a new empty data block.
730    pub fn new(header_addr: u64, block_offset: u64, nelmts: usize) -> Self {
731        Self {
732            class_id: EA_CLS_CHUNK,
733            header_addr,
734            block_offset,
735            elements: vec![UNDEF_ADDR; nelmts],
736        }
737    }
738
739    /// Number of bytes needed for the block_offset field.
740    pub fn block_offset_size(max_nelmts_bits: u8) -> usize {
741        std::cmp::max(1, (max_nelmts_bits as usize).div_ceil(8))
742    }
743
744    /// Compute the encoded size.
745    pub fn encoded_size(&self, ctx: &FormatContext, max_nelmts_bits: u8) -> usize {
746        let sa = ctx.sizeof_addr as usize;
747        let bo_size = Self::block_offset_size(max_nelmts_bits);
748        // signature(4) + version(1) + class_id(1)
749        // + header_addr(sa) + block_offset(bo_size)
750        // + elements(n * sa) + checksum(4)
751        4 + 1 + 1 + sa + bo_size + self.elements.len() * sa + 4
752    }
753
754    pub fn encode(&self, ctx: &FormatContext, max_nelmts_bits: u8) -> Vec<u8> {
755        let sa = ctx.sizeof_addr as usize;
756        let bo_size = Self::block_offset_size(max_nelmts_bits);
757        let size = self.encoded_size(ctx, max_nelmts_bits);
758        let mut buf = Vec::with_capacity(size);
759
760        buf.extend_from_slice(&EADB_SIGNATURE);
761        buf.push(EA_VERSION);
762        buf.push(self.class_id);
763        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
764        buf.extend_from_slice(&self.block_offset.to_le_bytes()[..bo_size]);
765
766        for &elem in &self.elements {
767            buf.extend_from_slice(&elem.to_le_bytes()[..sa]);
768        }
769
770        let cksum = checksum_metadata(&buf);
771        buf.extend_from_slice(&cksum.to_le_bytes());
772
773        debug_assert_eq!(buf.len(), size);
774        buf
775    }
776
777    pub fn decode(
778        buf: &[u8],
779        ctx: &FormatContext,
780        max_nelmts_bits: u8,
781        nelmts: usize,
782    ) -> FormatResult<Self> {
783        let sa = ctx.sizeof_addr as usize;
784        let bo_size = Self::block_offset_size(max_nelmts_bits);
785        let min_size = 4 + 1 + 1 + sa + bo_size + nelmts * sa + 4;
786
787        if buf.len() < min_size {
788            return Err(FormatError::BufferTooShort {
789                needed: min_size,
790                available: buf.len(),
791            });
792        }
793
794        if buf[0..4] != EADB_SIGNATURE {
795            return Err(FormatError::InvalidSignature);
796        }
797
798        let version = buf[4];
799        if version != EA_VERSION {
800            return Err(FormatError::InvalidVersion(version));
801        }
802
803        // Verify checksum
804        let data_end = min_size - 4;
805        let stored_cksum = u32::from_le_bytes([
806            buf[data_end],
807            buf[data_end + 1],
808            buf[data_end + 2],
809            buf[data_end + 3],
810        ]);
811        let computed_cksum = checksum_metadata(&buf[..data_end]);
812        if stored_cksum != computed_cksum {
813            return Err(FormatError::ChecksumMismatch {
814                expected: stored_cksum,
815                computed: computed_cksum,
816            });
817        }
818
819        let class_id = buf[5];
820        let mut pos = 6;
821        let header_addr = read_addr(&buf[pos..], sa);
822        pos += sa;
823        let block_offset = read_size(&buf[pos..], bo_size);
824        pos += bo_size;
825
826        let mut elements = Vec::with_capacity(nelmts);
827        for _ in 0..nelmts {
828            elements.push(read_addr(&buf[pos..], sa));
829            pos += sa;
830        }
831
832        Ok(Self {
833            class_id,
834            header_addr,
835            block_offset,
836            elements,
837        })
838    }
839}
840
841// ========================================================================= helpers
842
843fn read_addr(buf: &[u8], n: usize) -> u64 {
844    if buf[..n].iter().all(|&b| b == 0xFF) {
845        UNDEF_ADDR
846    } else {
847        let mut tmp = [0u8; 8];
848        tmp[..n].copy_from_slice(&buf[..n]);
849        u64::from_le_bytes(tmp)
850    }
851}
852
853fn read_size(buf: &[u8], n: usize) -> u64 {
854    let mut tmp = [0u8; 8];
855    tmp[..n].copy_from_slice(&buf[..n]);
856    u64::from_le_bytes(tmp)
857}
858
859/// Compute ndblk_addrs for the index block given the default params.
860///
861/// For sup_blk_min_data_ptrs = K:
862///   ndblk_addrs = 2 * (K - 1)
863pub fn compute_ndblk_addrs(sup_blk_min_data_ptrs: u8) -> usize {
864    2 * (sup_blk_min_data_ptrs as usize - 1)
865}
866
867/// Compute the total number of super blocks (nsblks) for the given parameters.
868fn compute_nsblks(idx_blk_elmts: u8, data_blk_min_elmts: u8, max_nelmts_bits: u8) -> usize {
869    let max_nelmts: u64 = 1u64 << (max_nelmts_bits as u64);
870    let nelmts_remaining = max_nelmts - idx_blk_elmts as u64;
871
872    let mut nsblks = 0usize;
873    let mut acc = 0u64;
874    while acc < nelmts_remaining {
875        let (ndblks_in_sblk, dblk_size) = if nsblks < 2 {
876            (1u64, data_blk_min_elmts as u64)
877        } else {
878            let half = (nsblks - 2) / 2;
879            (
880                1u64 << (half + 1),
881                (data_blk_min_elmts as u64) << (half + 1),
882            )
883        };
884        acc = acc.saturating_add(ndblks_in_sblk.saturating_mul(dblk_size));
885        nsblks += 1;
886    }
887    nsblks
888}
889
890/// Compute sblk_idx_start: the first super block whose data block addresses
891/// are NOT stored in the index block's dblk_addrs array.
892fn compute_sblk_idx_start(sup_blk_min_data_ptrs: u8, nsblks: usize) -> usize {
893    let ndblk_addrs = compute_ndblk_addrs(sup_blk_min_data_ptrs);
894    let mut dblks_counted = 0usize;
895    let mut sblk_idx_start = 0usize;
896
897    for s in 0..nsblks {
898        let ndblks_in_sblk = if s < 2 {
899            1
900        } else {
901            let half = (s - 2) / 2;
902            1 << (half + 1)
903        };
904
905        if dblks_counted + ndblks_in_sblk > ndblk_addrs {
906            break;
907        }
908        dblks_counted += ndblks_in_sblk;
909        sblk_idx_start = s + 1;
910    }
911    sblk_idx_start
912}
913
914/// Compute nsblk_addrs for the index block: the number of super block
915/// address slots stored in the EAIB.
916pub fn compute_nsblk_addrs(
917    idx_blk_elmts: u8,
918    data_blk_min_elmts: u8,
919    sup_blk_min_data_ptrs: u8,
920    max_nelmts_bits: u8,
921) -> usize {
922    let nsblks = compute_nsblks(idx_blk_elmts, data_blk_min_elmts, max_nelmts_bits);
923    let sblk_idx_start = compute_sblk_idx_start(sup_blk_min_data_ptrs, nsblks);
924    nsblks - sblk_idx_start
925}
926
927// ======================================================================= tests
928
929#[cfg(test)]
930mod tests {
931    use super::*;
932
933    fn ctx8() -> FormatContext {
934        FormatContext {
935            sizeof_addr: 8,
936            sizeof_size: 8,
937        }
938    }
939
940    fn ctx4() -> FormatContext {
941        FormatContext {
942            sizeof_addr: 4,
943            sizeof_size: 4,
944        }
945    }
946
947    #[test]
948    fn header_roundtrip() {
949        let mut hdr = ExtensibleArrayHeader::new_for_chunks(&ctx8());
950        hdr.idx_blk_addr = 0x1000;
951        hdr.max_idx_set = 3;
952        hdr.num_elmts_realized = 4;
953
954        let encoded = hdr.encode(&ctx8());
955        assert_eq!(encoded.len(), hdr.encoded_size(&ctx8()));
956        assert_eq!(&encoded[..4], b"EAHD");
957
958        let decoded = ExtensibleArrayHeader::decode(&encoded, &ctx8()).unwrap();
959        assert_eq!(decoded, hdr);
960    }
961
962    #[test]
963    fn header_roundtrip_ctx4() {
964        let mut hdr = ExtensibleArrayHeader::new_for_chunks(&ctx4());
965        hdr.raw_elmt_size = 4;
966        hdr.idx_blk_addr = 0x800;
967
968        let encoded = hdr.encode(&ctx4());
969        let decoded = ExtensibleArrayHeader::decode(&encoded, &ctx4()).unwrap();
970        assert_eq!(decoded, hdr);
971    }
972
973    #[test]
974    fn header_bad_signature() {
975        let mut hdr = ExtensibleArrayHeader::new_for_chunks(&ctx8());
976        hdr.idx_blk_addr = 0x1000;
977        let mut encoded = hdr.encode(&ctx8());
978        encoded[0] = b'X';
979        let err = ExtensibleArrayHeader::decode(&encoded, &ctx8()).unwrap_err();
980        assert!(matches!(err, FormatError::InvalidSignature));
981    }
982
983    #[test]
984    fn header_checksum_mismatch() {
985        let mut hdr = ExtensibleArrayHeader::new_for_chunks(&ctx8());
986        hdr.idx_blk_addr = 0x1000;
987        let mut encoded = hdr.encode(&ctx8());
988        encoded[6] ^= 0xFF; // corrupt a byte
989        let err = ExtensibleArrayHeader::decode(&encoded, &ctx8()).unwrap_err();
990        assert!(matches!(err, FormatError::ChecksumMismatch { .. }));
991    }
992
993    #[test]
994    fn index_block_roundtrip() {
995        let ndblk = compute_ndblk_addrs(4);
996        assert_eq!(ndblk, 6);
997
998        let mut iblk = ExtensibleArrayIndexBlock::new(0x500, 4, ndblk, 0);
999        iblk.elements[0] = 0x1000;
1000        iblk.elements[1] = 0x2000;
1001        iblk.dblk_addrs[0] = 0x3000;
1002
1003        let encoded = iblk.encode(&ctx8());
1004        assert_eq!(encoded.len(), iblk.encoded_size(&ctx8()));
1005        assert_eq!(&encoded[..4], b"EAIB");
1006
1007        let decoded = ExtensibleArrayIndexBlock::decode(&encoded, &ctx8(), 4, ndblk, 0).unwrap();
1008        assert_eq!(decoded, iblk);
1009    }
1010
1011    #[test]
1012    fn index_block_roundtrip_ctx4() {
1013        let iblk = ExtensibleArrayIndexBlock::new(0x300, 4, 6, 0);
1014        let encoded = iblk.encode(&ctx4());
1015        let decoded = ExtensibleArrayIndexBlock::decode(&encoded, &ctx4(), 4, 6, 0).unwrap();
1016        assert_eq!(decoded, iblk);
1017    }
1018
1019    #[test]
1020    fn index_block_bad_checksum() {
1021        let iblk = ExtensibleArrayIndexBlock::new(0x500, 4, 6, 0);
1022        let mut encoded = iblk.encode(&ctx8());
1023        encoded[8] ^= 0xFF;
1024        let err = ExtensibleArrayIndexBlock::decode(&encoded, &ctx8(), 4, 6, 0).unwrap_err();
1025        assert!(matches!(err, FormatError::ChecksumMismatch { .. }));
1026    }
1027
1028    #[test]
1029    fn data_block_roundtrip() {
1030        let mut dblk = ExtensibleArrayDataBlock::new(0x500, 4, 16);
1031        dblk.elements[0] = 0xA000;
1032        dblk.elements[5] = 0xB000;
1033
1034        let encoded = dblk.encode(&ctx8(), 32);
1035        assert_eq!(encoded.len(), dblk.encoded_size(&ctx8(), 32));
1036        assert_eq!(&encoded[..4], b"EADB");
1037
1038        let decoded = ExtensibleArrayDataBlock::decode(&encoded, &ctx8(), 32, 16).unwrap();
1039        assert_eq!(decoded, dblk);
1040    }
1041
1042    #[test]
1043    fn data_block_offset_size() {
1044        assert_eq!(ExtensibleArrayDataBlock::block_offset_size(8), 1);
1045        assert_eq!(ExtensibleArrayDataBlock::block_offset_size(16), 2);
1046        assert_eq!(ExtensibleArrayDataBlock::block_offset_size(32), 4);
1047        assert_eq!(ExtensibleArrayDataBlock::block_offset_size(0), 1);
1048    }
1049
1050    #[test]
1051    fn compute_ndblk_addrs_default() {
1052        // sup_blk_min_data_ptrs=4 => ndblk=6
1053        assert_eq!(compute_ndblk_addrs(4), 6);
1054        assert_eq!(compute_ndblk_addrs(2), 2);
1055    }
1056
1057    #[test]
1058    fn compute_nsblk_addrs_default() {
1059        // Default params: idx_blk_elmts=4, data_blk_min_elmts=16,
1060        // sup_blk_min_data_ptrs=4, max_nelmts_bits=32
1061        // Should give nsblk_addrs=25 (matching HDF5 library)
1062        assert_eq!(compute_nsblk_addrs(4, 16, 4, 32), 25);
1063    }
1064}