Skip to main content

rust_hdf5/format/chunk_index/
extensible_array.rs

1//! Extensible Array (EA) chunk index structures for HDF5.
2//!
3//! Implements the on-disk format for the extensible array used to index
4//! chunked datasets with one unlimited dimension (the typical SWMR use case).
5//!
6//! Structures:
7//!   - Header (EAHD): metadata and statistics about the extensible array
8//!   - Index Block (EAIB): holds direct chunk addresses and pointers to data/super blocks
9//!   - Data Block (EADB): holds additional chunk addresses when the index block is full
10
11pub(crate) use crate::format::bytes::read_le_addr as read_addr;
12use crate::format::bytes::read_le_uint as read_size;
13use crate::format::checksum::checksum_metadata;
14use crate::format::{FormatContext, FormatError, FormatResult, UNDEF_ADDR};
15
16/// Signature for the extensible array header.
17pub const EAHD_SIGNATURE: [u8; 4] = *b"EAHD";
18/// Signature for the extensible array index block.
19pub const EAIB_SIGNATURE: [u8; 4] = *b"EAIB";
20/// Signature for the extensible array data block.
21pub const EADB_SIGNATURE: [u8; 4] = *b"EADB";
22/// Signature for the extensible array super block.
23pub const EASB_SIGNATURE: [u8; 4] = *b"EASB";
24
25/// Extensible array version.
26pub const EA_VERSION: u8 = 0;
27
28/// Class ID for unfiltered chunks (H5EA_CLS_CHUNK).
29pub const EA_CLS_CHUNK: u8 = 0;
30/// Class ID for filtered chunks (H5EA_CLS_FILT_CHUNK).
31pub const EA_CLS_FILT_CHUNK: u8 = 1;
32
33/// A filtered chunk element stored in the extensible array.
34#[derive(Debug, Clone, Copy, PartialEq)]
35pub struct FilteredChunkEntry {
36    /// Address of the compressed chunk data in the file.
37    pub addr: u64,
38    /// Size of the compressed chunk in bytes.
39    pub nbytes: u64,
40    /// Filter mask — bit N set means filter N was NOT applied.
41    pub filter_mask: u32,
42}
43
44impl FilteredChunkEntry {
45    pub fn undef() -> Self {
46        Self {
47            addr: UNDEF_ADDR,
48            nbytes: 0,
49            filter_mask: 0,
50        }
51    }
52
53    pub fn is_undef(&self) -> bool {
54        self.addr == UNDEF_ADDR
55    }
56
57    /// Compute raw element size on disk: sizeof_addr + chunk_size_len + 4.
58    pub fn raw_size(sizeof_addr: u8, chunk_size_len: u8) -> u8 {
59        sizeof_addr + chunk_size_len + 4
60    }
61
62    /// Encode a single filtered entry.
63    pub fn encode(&self, sizeof_addr: usize, chunk_size_len: usize) -> Vec<u8> {
64        let mut buf = Vec::with_capacity(sizeof_addr + chunk_size_len + 4);
65        buf.extend_from_slice(&self.addr.to_le_bytes()[..sizeof_addr]);
66        buf.extend_from_slice(&self.nbytes.to_le_bytes()[..chunk_size_len]);
67        buf.extend_from_slice(&self.filter_mask.to_le_bytes());
68        buf
69    }
70
71    /// Decode a single filtered entry.
72    pub fn decode(buf: &[u8], sizeof_addr: usize, chunk_size_len: usize) -> Self {
73        let addr = read_addr(buf, sizeof_addr);
74        let nbytes = read_size(&buf[sizeof_addr..], chunk_size_len);
75        let off = sizeof_addr + chunk_size_len;
76        let filter_mask = u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]]);
77        Self {
78            addr,
79            nbytes,
80            filter_mask,
81        }
82    }
83}
84
85/// Compute chunk_size_len: bytes needed to encode the uncompressed chunk size.
86/// Formula from HDF5 C: 1 + (log2(chunk_size) + 8) / 8, capped at 8.
87pub fn compute_chunk_size_len(uncompressed_chunk_bytes: u64) -> u8 {
88    if uncompressed_chunk_bytes == 0 {
89        return 1;
90    }
91    let log2 = 63 - uncompressed_chunk_bytes.leading_zeros();
92    let len = 1 + (log2 + 8) / 8;
93    std::cmp::min(len, 8) as u8
94}
95
96/// Extensible array header.
97///
98/// On-disk layout:
99/// ```text
100/// "EAHD"(4) + version=0(1) + class_id(1)
101/// + raw_elmt_size(1) + max_nelmts_bits(1) + idx_blk_elmts(1)
102/// + data_blk_min_elmts(1) + sup_blk_min_data_ptrs(1)
103/// + max_dblk_page_nelmts_bits(1)
104/// + 6 statistics (each sizeof_size bytes)
105/// + idx_blk_addr (sizeof_addr)
106/// + checksum(4)
107/// ```
108#[derive(Debug, Clone, PartialEq)]
109pub struct ExtensibleArrayHeader {
110    pub class_id: u8,
111    pub raw_elmt_size: u8,
112    pub max_nelmts_bits: u8,
113    pub idx_blk_elmts: u8,
114    pub data_blk_min_elmts: u8,
115    pub sup_blk_min_data_ptrs: u8,
116    pub max_dblk_page_nelmts_bits: u8,
117    // statistics
118    pub num_sblks_created: u64,
119    pub size_sblks_created: u64,
120    pub num_dblks_created: u64,
121    pub size_dblks_created: u64,
122    pub max_idx_set: u64,
123    pub num_elmts_realized: u64,
124    pub idx_blk_addr: u64,
125}
126
127impl ExtensibleArrayHeader {
128    /// Create a new header for unfiltered chunk indexing.
129    pub fn new_for_chunks(ctx: &FormatContext) -> Self {
130        Self {
131            class_id: EA_CLS_CHUNK,
132            raw_elmt_size: ctx.sizeof_addr,
133            max_nelmts_bits: 32,
134            idx_blk_elmts: 4,
135            data_blk_min_elmts: 16,
136            sup_blk_min_data_ptrs: 4,
137            max_dblk_page_nelmts_bits: 10,
138            num_sblks_created: 0,
139            size_sblks_created: 0,
140            num_dblks_created: 0,
141            size_dblks_created: 0,
142            max_idx_set: 0,
143            num_elmts_realized: 0,
144            idx_blk_addr: UNDEF_ADDR,
145        }
146    }
147
148    /// Create a new header for filtered (compressed) chunk indexing.
149    pub fn new_for_filtered_chunks(ctx: &FormatContext, chunk_size_len: u8) -> Self {
150        Self {
151            class_id: EA_CLS_FILT_CHUNK,
152            raw_elmt_size: FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len),
153            max_nelmts_bits: 32,
154            idx_blk_elmts: 4,
155            data_blk_min_elmts: 16,
156            sup_blk_min_data_ptrs: 4,
157            max_dblk_page_nelmts_bits: 10,
158            num_sblks_created: 0,
159            size_sblks_created: 0,
160            num_dblks_created: 0,
161            size_dblks_created: 0,
162            max_idx_set: 0,
163            num_elmts_realized: 0,
164            idx_blk_addr: UNDEF_ADDR,
165        }
166    }
167
168    /// Compute the encoded size (for pre-allocation).
169    pub fn encoded_size(&self, ctx: &FormatContext) -> usize {
170        let ss = ctx.sizeof_size as usize;
171        let sa = ctx.sizeof_addr as usize;
172        // signature(4) + version(1) + class_id(1)
173        // + raw_elmt_size(1) + max_nelmts_bits(1) + idx_blk_elmts(1)
174        // + data_blk_min_elmts(1) + sup_blk_min_data_ptrs(1)
175        // + max_dblk_page_nelmts_bits(1)
176        // + 6 * sizeof_size (statistics)
177        // + sizeof_addr (idx_blk_addr)
178        // + checksum(4)
179        4 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 6 * ss + sa + 4
180    }
181
182    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
183        let ss = ctx.sizeof_size as usize;
184        let sa = ctx.sizeof_addr as usize;
185        let size = self.encoded_size(ctx);
186        let mut buf = Vec::with_capacity(size);
187
188        buf.extend_from_slice(&EAHD_SIGNATURE);
189        buf.push(EA_VERSION);
190        buf.push(self.class_id);
191        buf.push(self.raw_elmt_size);
192        buf.push(self.max_nelmts_bits);
193        buf.push(self.idx_blk_elmts);
194        buf.push(self.data_blk_min_elmts);
195        buf.push(self.sup_blk_min_data_ptrs);
196        buf.push(self.max_dblk_page_nelmts_bits);
197
198        // Statistics
199        buf.extend_from_slice(&self.num_sblks_created.to_le_bytes()[..ss]);
200        buf.extend_from_slice(&self.size_sblks_created.to_le_bytes()[..ss]);
201        buf.extend_from_slice(&self.num_dblks_created.to_le_bytes()[..ss]);
202        buf.extend_from_slice(&self.size_dblks_created.to_le_bytes()[..ss]);
203        buf.extend_from_slice(&self.max_idx_set.to_le_bytes()[..ss]);
204        buf.extend_from_slice(&self.num_elmts_realized.to_le_bytes()[..ss]);
205
206        // Index block address
207        buf.extend_from_slice(&self.idx_blk_addr.to_le_bytes()[..sa]);
208
209        // Checksum
210        let cksum = checksum_metadata(&buf);
211        buf.extend_from_slice(&cksum.to_le_bytes());
212
213        debug_assert_eq!(buf.len(), size);
214        buf
215    }
216
217    pub fn decode(buf: &[u8], ctx: &FormatContext) -> FormatResult<Self> {
218        let ss = ctx.sizeof_size as usize;
219        let sa = ctx.sizeof_addr as usize;
220        let min_size = 4 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 6 * ss + sa + 4;
221
222        if buf.len() < min_size {
223            return Err(FormatError::BufferTooShort {
224                needed: min_size,
225                available: buf.len(),
226            });
227        }
228
229        if buf[0..4] != EAHD_SIGNATURE {
230            return Err(FormatError::InvalidSignature);
231        }
232
233        let version = buf[4];
234        if version != EA_VERSION {
235            return Err(FormatError::InvalidVersion(version));
236        }
237
238        // Verify checksum
239        let data_end = min_size - 4;
240        let stored_cksum = u32::from_le_bytes([
241            buf[data_end],
242            buf[data_end + 1],
243            buf[data_end + 2],
244            buf[data_end + 3],
245        ]);
246        let computed_cksum = checksum_metadata(&buf[..data_end]);
247        if stored_cksum != computed_cksum {
248            return Err(FormatError::ChecksumMismatch {
249                expected: stored_cksum,
250                computed: computed_cksum,
251            });
252        }
253
254        let mut pos = 5;
255        let class_id = buf[pos];
256        pos += 1;
257        let raw_elmt_size = buf[pos];
258        pos += 1;
259        let max_nelmts_bits = buf[pos];
260        pos += 1;
261        let idx_blk_elmts = buf[pos];
262        pos += 1;
263        let data_blk_min_elmts = buf[pos];
264        pos += 1;
265        let sup_blk_min_data_ptrs = buf[pos];
266        pos += 1;
267        let max_dblk_page_nelmts_bits = buf[pos];
268        pos += 1;
269
270        let num_sblks_created = read_size(&buf[pos..], ss);
271        pos += ss;
272        let size_sblks_created = read_size(&buf[pos..], ss);
273        pos += ss;
274        let num_dblks_created = read_size(&buf[pos..], ss);
275        pos += ss;
276        let size_dblks_created = read_size(&buf[pos..], ss);
277        pos += ss;
278        let max_idx_set = read_size(&buf[pos..], ss);
279        pos += ss;
280        let num_elmts_realized = read_size(&buf[pos..], ss);
281        pos += ss;
282
283        let idx_blk_addr = read_addr(&buf[pos..], sa);
284
285        Ok(Self {
286            class_id,
287            raw_elmt_size,
288            max_nelmts_bits,
289            idx_blk_elmts,
290            data_blk_min_elmts,
291            sup_blk_min_data_ptrs,
292            max_dblk_page_nelmts_bits,
293            num_sblks_created,
294            size_sblks_created,
295            num_dblks_created,
296            size_dblks_created,
297            max_idx_set,
298            num_elmts_realized,
299            idx_blk_addr,
300        })
301    }
302}
303
304/// Extensible array index block.
305///
306/// On-disk layout:
307/// ```text
308/// "EAIB"(4) + version=0(1) + class_id(1)
309/// + header_addr(sizeof_addr)
310/// + elements (idx_blk_elmts * raw_elmt_size bytes)
311/// + data_block_addresses (ndblk_addrs * sizeof_addr)
312/// + super_block_addresses (nsblk_addrs * sizeof_addr)
313/// + checksum(4)
314/// ```
315#[derive(Debug, Clone, PartialEq)]
316pub struct ExtensibleArrayIndexBlock {
317    pub class_id: u8,
318    pub header_addr: u64,
319    /// Direct chunk addresses in the index block.
320    pub elements: Vec<u64>,
321    /// Data block addresses.
322    pub dblk_addrs: Vec<u64>,
323    /// Super block addresses.
324    pub sblk_addrs: Vec<u64>,
325}
326
327/// Filtered variant of the extensible array index block.
328///
329/// Stores `FilteredChunkEntry` elements instead of raw addresses.
330#[derive(Debug, Clone, PartialEq)]
331pub struct FilteredIndexBlock {
332    pub class_id: u8,
333    pub header_addr: u64,
334    pub elements: Vec<FilteredChunkEntry>,
335    pub dblk_addrs: Vec<u64>,
336    pub sblk_addrs: Vec<u64>,
337}
338
339impl FilteredIndexBlock {
340    pub fn new(
341        header_addr: u64,
342        idx_blk_elmts: u8,
343        ndblk_addrs: usize,
344        nsblk_addrs: usize,
345    ) -> Self {
346        Self {
347            class_id: EA_CLS_FILT_CHUNK,
348            header_addr,
349            elements: vec![FilteredChunkEntry::undef(); idx_blk_elmts as usize],
350            dblk_addrs: vec![UNDEF_ADDR; ndblk_addrs],
351            sblk_addrs: vec![UNDEF_ADDR; nsblk_addrs],
352        }
353    }
354
355    pub fn encode(&self, ctx: &FormatContext, chunk_size_len: u8) -> Vec<u8> {
356        let sa = ctx.sizeof_addr as usize;
357        let elmt_size = FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len) as usize;
358        let size = 4
359            + 1
360            + 1
361            + sa
362            + self.elements.len() * elmt_size
363            + self.dblk_addrs.len() * sa
364            + self.sblk_addrs.len() * sa
365            + 4;
366        let mut buf = Vec::with_capacity(size);
367
368        buf.extend_from_slice(&EAIB_SIGNATURE);
369        buf.push(EA_VERSION);
370        buf.push(self.class_id);
371        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
372
373        for elem in &self.elements {
374            buf.extend_from_slice(&elem.encode(sa, chunk_size_len as usize));
375        }
376        for &addr in &self.dblk_addrs {
377            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
378        }
379        for &addr in &self.sblk_addrs {
380            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
381        }
382
383        let cksum = checksum_metadata(&buf);
384        buf.extend_from_slice(&cksum.to_le_bytes());
385        debug_assert_eq!(buf.len(), size);
386        buf
387    }
388
389    pub fn decode(
390        buf: &[u8],
391        ctx: &FormatContext,
392        idx_blk_elmts: usize,
393        ndblk_addrs: usize,
394        nsblk_addrs: usize,
395        chunk_size_len: u8,
396    ) -> FormatResult<Self> {
397        let sa = ctx.sizeof_addr as usize;
398        let elmt_size = FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len) as usize;
399        let min_size =
400            4 + 1 + 1 + sa + idx_blk_elmts * elmt_size + ndblk_addrs * sa + nsblk_addrs * sa + 4;
401
402        if buf.len() < min_size {
403            return Err(FormatError::BufferTooShort {
404                needed: min_size,
405                available: buf.len(),
406            });
407        }
408        if buf[0..4] != EAIB_SIGNATURE {
409            return Err(FormatError::InvalidSignature);
410        }
411        if buf[4] != EA_VERSION {
412            return Err(FormatError::InvalidVersion(buf[4]));
413        }
414
415        let data_end = min_size - 4;
416        let stored = u32::from_le_bytes([
417            buf[data_end],
418            buf[data_end + 1],
419            buf[data_end + 2],
420            buf[data_end + 3],
421        ]);
422        let computed = checksum_metadata(&buf[..data_end]);
423        if stored != computed {
424            return Err(FormatError::ChecksumMismatch {
425                expected: stored,
426                computed,
427            });
428        }
429
430        let class_id = buf[5];
431        let mut pos = 6;
432        let header_addr = read_addr(&buf[pos..], sa);
433        pos += sa;
434
435        let mut elements = Vec::with_capacity(idx_blk_elmts);
436        for _ in 0..idx_blk_elmts {
437            elements.push(FilteredChunkEntry::decode(
438                &buf[pos..],
439                sa,
440                chunk_size_len as usize,
441            ));
442            pos += elmt_size;
443        }
444        let mut dblk_addrs = Vec::with_capacity(ndblk_addrs);
445        for _ in 0..ndblk_addrs {
446            dblk_addrs.push(read_addr(&buf[pos..], sa));
447            pos += sa;
448        }
449        let mut sblk_addrs = Vec::with_capacity(nsblk_addrs);
450        for _ in 0..nsblk_addrs {
451            sblk_addrs.push(read_addr(&buf[pos..], sa));
452            pos += sa;
453        }
454
455        Ok(Self {
456            class_id,
457            header_addr,
458            elements,
459            dblk_addrs,
460            sblk_addrs,
461        })
462    }
463}
464
465/// Filtered variant of the extensible array data block.
466#[derive(Debug, Clone, PartialEq)]
467pub struct FilteredDataBlock {
468    pub class_id: u8,
469    pub header_addr: u64,
470    pub block_offset: u64,
471    pub elements: Vec<FilteredChunkEntry>,
472}
473
474impl FilteredDataBlock {
475    pub fn new(header_addr: u64, block_offset: u64, nelmts: usize) -> Self {
476        Self {
477            class_id: EA_CLS_FILT_CHUNK,
478            header_addr,
479            block_offset,
480            elements: vec![FilteredChunkEntry::undef(); nelmts],
481        }
482    }
483
484    pub fn encode(&self, ctx: &FormatContext, max_nelmts_bits: u8, chunk_size_len: u8) -> Vec<u8> {
485        let sa = ctx.sizeof_addr as usize;
486        let bo_size = ExtensibleArrayDataBlock::block_offset_size(max_nelmts_bits);
487        let elmt_size = FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len) as usize;
488        let size = 4 + 1 + 1 + sa + bo_size + self.elements.len() * elmt_size + 4;
489        let mut buf = Vec::with_capacity(size);
490
491        buf.extend_from_slice(&EADB_SIGNATURE);
492        buf.push(EA_VERSION);
493        buf.push(self.class_id);
494        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
495        buf.extend_from_slice(&self.block_offset.to_le_bytes()[..bo_size]);
496
497        for elem in &self.elements {
498            buf.extend_from_slice(&elem.encode(sa, chunk_size_len as usize));
499        }
500
501        let cksum = checksum_metadata(&buf);
502        buf.extend_from_slice(&cksum.to_le_bytes());
503        debug_assert_eq!(buf.len(), size);
504        buf
505    }
506
507    pub fn decode(
508        buf: &[u8],
509        ctx: &FormatContext,
510        max_nelmts_bits: u8,
511        nelmts: usize,
512        chunk_size_len: u8,
513    ) -> FormatResult<Self> {
514        let sa = ctx.sizeof_addr as usize;
515        let bo_size = ExtensibleArrayDataBlock::block_offset_size(max_nelmts_bits);
516        let elmt_size = FilteredChunkEntry::raw_size(ctx.sizeof_addr, chunk_size_len) as usize;
517        let min_size = nelmts
518            .saturating_mul(elmt_size)
519            .saturating_add(10 + sa + bo_size);
520
521        if buf.len() < min_size {
522            return Err(FormatError::BufferTooShort {
523                needed: min_size,
524                available: buf.len(),
525            });
526        }
527        if buf[0..4] != EADB_SIGNATURE {
528            return Err(FormatError::InvalidSignature);
529        }
530        if buf[4] != EA_VERSION {
531            return Err(FormatError::InvalidVersion(buf[4]));
532        }
533
534        let data_end = min_size - 4;
535        let stored = u32::from_le_bytes([
536            buf[data_end],
537            buf[data_end + 1],
538            buf[data_end + 2],
539            buf[data_end + 3],
540        ]);
541        let computed = checksum_metadata(&buf[..data_end]);
542        if stored != computed {
543            return Err(FormatError::ChecksumMismatch {
544                expected: stored,
545                computed,
546            });
547        }
548
549        let class_id = buf[5];
550        let mut pos = 6;
551        let header_addr = read_addr(&buf[pos..], sa);
552        pos += sa;
553        let block_offset = read_size(&buf[pos..], bo_size);
554        pos += bo_size;
555
556        let mut elements = Vec::with_capacity(nelmts);
557        for _ in 0..nelmts {
558            elements.push(FilteredChunkEntry::decode(
559                &buf[pos..],
560                sa,
561                chunk_size_len as usize,
562            ));
563            pos += elmt_size;
564        }
565
566        Ok(Self {
567            class_id,
568            header_addr,
569            block_offset,
570            elements,
571        })
572    }
573}
574
575impl ExtensibleArrayIndexBlock {
576    /// Create a new empty index block.
577    pub fn new(
578        header_addr: u64,
579        idx_blk_elmts: u8,
580        ndblk_addrs: usize,
581        nsblk_addrs: usize,
582    ) -> Self {
583        Self {
584            class_id: EA_CLS_CHUNK,
585            header_addr,
586            elements: vec![UNDEF_ADDR; idx_blk_elmts as usize],
587            dblk_addrs: vec![UNDEF_ADDR; ndblk_addrs],
588            sblk_addrs: vec![UNDEF_ADDR; nsblk_addrs],
589        }
590    }
591
592    /// Compute the encoded size.
593    pub fn encoded_size(&self, ctx: &FormatContext) -> usize {
594        let sa = ctx.sizeof_addr as usize;
595        // signature(4) + version(1) + class_id(1)
596        // + header_addr(sa)
597        // + elements(n * sa)
598        // + dblk_addrs(n * sa)
599        // + sblk_addrs(n * sa)
600        // + checksum(4)
601        4 + 1
602            + 1
603            + sa
604            + self.elements.len() * sa
605            + self.dblk_addrs.len() * sa
606            + self.sblk_addrs.len() * sa
607            + 4
608    }
609
610    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
611        let sa = ctx.sizeof_addr as usize;
612        let size = self.encoded_size(ctx);
613        let mut buf = Vec::with_capacity(size);
614
615        buf.extend_from_slice(&EAIB_SIGNATURE);
616        buf.push(EA_VERSION);
617        buf.push(self.class_id);
618        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
619
620        for &elem in &self.elements {
621            buf.extend_from_slice(&elem.to_le_bytes()[..sa]);
622        }
623
624        for &addr in &self.dblk_addrs {
625            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
626        }
627
628        for &addr in &self.sblk_addrs {
629            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
630        }
631
632        let cksum = checksum_metadata(&buf);
633        buf.extend_from_slice(&cksum.to_le_bytes());
634
635        debug_assert_eq!(buf.len(), size);
636        buf
637    }
638
639    pub fn decode(
640        buf: &[u8],
641        ctx: &FormatContext,
642        idx_blk_elmts: usize,
643        ndblk_addrs: usize,
644        nsblk_addrs: usize,
645    ) -> FormatResult<Self> {
646        let sa = ctx.sizeof_addr as usize;
647        let min_size =
648            4 + 1 + 1 + sa + idx_blk_elmts * sa + ndblk_addrs * sa + nsblk_addrs * sa + 4;
649
650        if buf.len() < min_size {
651            return Err(FormatError::BufferTooShort {
652                needed: min_size,
653                available: buf.len(),
654            });
655        }
656
657        if buf[0..4] != EAIB_SIGNATURE {
658            return Err(FormatError::InvalidSignature);
659        }
660
661        let version = buf[4];
662        if version != EA_VERSION {
663            return Err(FormatError::InvalidVersion(version));
664        }
665
666        // Verify checksum
667        let data_end = min_size - 4;
668        let stored_cksum = u32::from_le_bytes([
669            buf[data_end],
670            buf[data_end + 1],
671            buf[data_end + 2],
672            buf[data_end + 3],
673        ]);
674        let computed_cksum = checksum_metadata(&buf[..data_end]);
675        if stored_cksum != computed_cksum {
676            return Err(FormatError::ChecksumMismatch {
677                expected: stored_cksum,
678                computed: computed_cksum,
679            });
680        }
681
682        let class_id = buf[5];
683        let mut pos = 6;
684        let header_addr = read_addr(&buf[pos..], sa);
685        pos += sa;
686
687        let mut elements = Vec::with_capacity(idx_blk_elmts);
688        for _ in 0..idx_blk_elmts {
689            elements.push(read_addr(&buf[pos..], sa));
690            pos += sa;
691        }
692
693        let mut dblk_addrs = Vec::with_capacity(ndblk_addrs);
694        for _ in 0..ndblk_addrs {
695            dblk_addrs.push(read_addr(&buf[pos..], sa));
696            pos += sa;
697        }
698
699        let mut sblk_addrs = Vec::with_capacity(nsblk_addrs);
700        for _ in 0..nsblk_addrs {
701            sblk_addrs.push(read_addr(&buf[pos..], sa));
702            pos += sa;
703        }
704
705        Ok(Self {
706            class_id,
707            header_addr,
708            elements,
709            dblk_addrs,
710            sblk_addrs,
711        })
712    }
713}
714
715/// Extensible array data block.
716///
717/// On-disk layout:
718/// ```text
719/// "EADB"(4) + version=0(1) + class_id(1)
720/// + header_addr(sizeof_addr)
721/// + block_offset (variable length)
722/// + elements(nelmts * raw_elmt_size)
723/// + checksum(4)
724/// ```
725#[derive(Debug, Clone, PartialEq)]
726pub struct ExtensibleArrayDataBlock {
727    pub class_id: u8,
728    pub header_addr: u64,
729    pub block_offset: u64,
730    /// Chunk addresses.
731    pub elements: Vec<u64>,
732}
733
734impl ExtensibleArrayDataBlock {
735    /// Create a new empty data block.
736    pub fn new(header_addr: u64, block_offset: u64, nelmts: usize) -> Self {
737        Self {
738            class_id: EA_CLS_CHUNK,
739            header_addr,
740            block_offset,
741            elements: vec![UNDEF_ADDR; nelmts],
742        }
743    }
744
745    /// Number of bytes needed for the block_offset field.
746    pub fn block_offset_size(max_nelmts_bits: u8) -> usize {
747        std::cmp::max(1, (max_nelmts_bits as usize).div_ceil(8))
748    }
749
750    /// Compute the encoded size.
751    pub fn encoded_size(&self, ctx: &FormatContext, max_nelmts_bits: u8) -> usize {
752        let sa = ctx.sizeof_addr as usize;
753        let bo_size = Self::block_offset_size(max_nelmts_bits);
754        // signature(4) + version(1) + class_id(1)
755        // + header_addr(sa) + block_offset(bo_size)
756        // + elements(n * sa) + checksum(4)
757        4 + 1 + 1 + sa + bo_size + self.elements.len() * sa + 4
758    }
759
760    pub fn encode(&self, ctx: &FormatContext, max_nelmts_bits: u8) -> Vec<u8> {
761        let sa = ctx.sizeof_addr as usize;
762        let bo_size = Self::block_offset_size(max_nelmts_bits);
763        let size = self.encoded_size(ctx, max_nelmts_bits);
764        let mut buf = Vec::with_capacity(size);
765
766        buf.extend_from_slice(&EADB_SIGNATURE);
767        buf.push(EA_VERSION);
768        buf.push(self.class_id);
769        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
770        buf.extend_from_slice(&self.block_offset.to_le_bytes()[..bo_size]);
771
772        for &elem in &self.elements {
773            buf.extend_from_slice(&elem.to_le_bytes()[..sa]);
774        }
775
776        let cksum = checksum_metadata(&buf);
777        buf.extend_from_slice(&cksum.to_le_bytes());
778
779        debug_assert_eq!(buf.len(), size);
780        buf
781    }
782
783    pub fn decode(
784        buf: &[u8],
785        ctx: &FormatContext,
786        max_nelmts_bits: u8,
787        nelmts: usize,
788    ) -> FormatResult<Self> {
789        let sa = ctx.sizeof_addr as usize;
790        let bo_size = Self::block_offset_size(max_nelmts_bits);
791        let min_size = nelmts.saturating_mul(sa).saturating_add(10 + sa + bo_size);
792
793        if buf.len() < min_size {
794            return Err(FormatError::BufferTooShort {
795                needed: min_size,
796                available: buf.len(),
797            });
798        }
799
800        if buf[0..4] != EADB_SIGNATURE {
801            return Err(FormatError::InvalidSignature);
802        }
803
804        let version = buf[4];
805        if version != EA_VERSION {
806            return Err(FormatError::InvalidVersion(version));
807        }
808
809        // Verify checksum
810        let data_end = min_size - 4;
811        let stored_cksum = u32::from_le_bytes([
812            buf[data_end],
813            buf[data_end + 1],
814            buf[data_end + 2],
815            buf[data_end + 3],
816        ]);
817        let computed_cksum = checksum_metadata(&buf[..data_end]);
818        if stored_cksum != computed_cksum {
819            return Err(FormatError::ChecksumMismatch {
820                expected: stored_cksum,
821                computed: computed_cksum,
822            });
823        }
824
825        let class_id = buf[5];
826        let mut pos = 6;
827        let header_addr = read_addr(&buf[pos..], sa);
828        pos += sa;
829        let block_offset = read_size(&buf[pos..], bo_size);
830        pos += bo_size;
831
832        let mut elements = Vec::with_capacity(nelmts);
833        for _ in 0..nelmts {
834            elements.push(read_addr(&buf[pos..], sa));
835            pos += sa;
836        }
837
838        Ok(Self {
839            class_id,
840            header_addr,
841            block_offset,
842            elements,
843        })
844    }
845}
846
847// ========================================================================= helpers
848
849/// Compute ndblk_addrs for the index block given the creation params.
850///
851/// For sup_blk_min_data_ptrs = K:
852///   ndblk_addrs = 2 * (K - 1)
853///
854/// Returns an error if `K` is zero (`K` comes straight out of a decoded
855/// file and a malformed value must not underflow).
856pub fn compute_ndblk_addrs(sup_blk_min_data_ptrs: u8) -> FormatResult<usize> {
857    if sup_blk_min_data_ptrs == 0 {
858        return Err(FormatError::InvalidData(
859            "extensible-array sup_blk_min_data_ptrs must be non-zero".into(),
860        ));
861    }
862    Ok(2 * (sup_blk_min_data_ptrs as usize - 1))
863}
864
865/// `log2` of a power of two.
866fn log2_pow2(n: u64) -> u32 {
867    debug_assert!(n.is_power_of_two());
868    n.trailing_zeros()
869}
870
871/// Floor of `log2(n)` for `n >= 1`.
872fn log2_floor(n: u64) -> u32 {
873    debug_assert!(n >= 1);
874    63 - n.leading_zeros()
875}
876
877/// Layout of one extensible-array super block (`H5EA_sblk_info_t`).
878#[derive(Debug, Clone, Copy, PartialEq)]
879pub struct EaSblkInfo {
880    /// Number of data blocks in this super block.
881    pub ndblks: u64,
882    /// Number of elements in each data block of this super block.
883    pub dblk_nelmts: u64,
884    /// Index of the first element in this super block (excludes `idx_blk_elmts`).
885    pub start_idx: u64,
886    /// Global index of the first data block in this super block.
887    pub start_dblk: u64,
888}
889
890/// Where a chunk lives within the extensible array.
891#[derive(Debug, Clone, Copy, PartialEq)]
892pub enum EaLoc {
893    /// Stored directly in the index block at `elem`.
894    Index { elem: usize },
895    /// Stored in a data block.
896    Dblk(EaChunkLoc),
897}
898
899/// Location of a chunk that lives in an EA data block.
900#[derive(Debug, Clone, Copy, PartialEq)]
901pub struct EaChunkLoc {
902    /// Owning super-block index.
903    pub sblk_idx: usize,
904    /// Elements per data block in that super block.
905    pub dblk_nelmts: u64,
906    /// Element offset of the chunk within its data block.
907    pub offset_in_dblk: u64,
908    /// `block_offset` value to stamp into the data block header.
909    pub dblk_block_offset: u64,
910    /// Whether the data block exceeds the page size (paged — unsupported).
911    pub paged: bool,
912    /// How the data block address is reached.
913    pub path: EaDblkPath,
914}
915
916/// How an EA data block's address is reached from the index block.
917#[derive(Debug, Clone, Copy, PartialEq)]
918pub enum EaDblkPath {
919    /// Address is `index_block.dblk_addrs[idx]`.
920    Direct { idx: usize },
921    /// Address is `super_block(index_block.sblk_addrs[sblk_off]).dblk_addrs[local_dblk]`.
922    ViaSblk {
923        sblk_off: usize,
924        local_dblk: usize,
925        ndblks_in_sblk: usize,
926        /// `block_offset` value for the super block header.
927        sblk_block_offset: u64,
928    },
929}
930
931/// Extensible-array geometry derived from the creation parameters, matching
932/// the libhdf5 on-disk layout (`H5EA__hdr_init`, `H5EAiblock.c`).
933#[derive(Debug, Clone)]
934pub struct EaGeometry {
935    pub idx_blk_elmts: u64,
936    pub data_blk_min_elmts: u64,
937    /// Elements per data block page (paging threshold).
938    pub dblk_page_nelmts: u64,
939    /// Super blocks whose data-block addresses live in the index block.
940    pub iblock_nsblks: usize,
941    /// Data-block address slots in the index block.
942    pub ndblk_addrs: usize,
943    /// Super-block address slots in the index block.
944    pub nsblk_addrs: usize,
945    /// Per-super-block layout, length = total super-block count.
946    pub sblk: Vec<EaSblkInfo>,
947}
948
949impl EaGeometry {
950    /// Derive the geometry from the EA creation parameters.
951    ///
952    /// All parameters originate from a decoded file (the data-layout
953    /// message), so this validates them and returns a `FormatError` rather
954    /// than panicking on malformed or hostile input.
955    pub fn new(
956        idx_blk_elmts: u8,
957        data_blk_min_elmts: u8,
958        sup_blk_min_data_ptrs: u8,
959        max_nelmts_bits: u8,
960        max_dblk_page_nelmts_bits: u8,
961    ) -> FormatResult<Self> {
962        let min = data_blk_min_elmts as u64;
963        if min == 0 || !min.is_power_of_two() {
964            return Err(FormatError::InvalidData(format!(
965                "extensible-array data_blk_min_elmts must be a non-zero power \
966                 of two, got {data_blk_min_elmts}"
967            )));
968        }
969        let sup = sup_blk_min_data_ptrs as u64;
970        if sup == 0 || !sup.is_power_of_two() {
971            return Err(FormatError::InvalidData(format!(
972                "extensible-array sup_blk_min_data_ptrs must be a non-zero \
973                 power of two, got {sup_blk_min_data_ptrs}"
974            )));
975        }
976        // `max_nelmts_bits` bounds the array's index space; keep it small
977        // enough that the per-super-block geometry cannot overflow u64.
978        let min_bits = log2_pow2(min);
979        if max_nelmts_bits as u32 > 64 || (max_nelmts_bits as u32) < min_bits {
980            return Err(FormatError::InvalidData(format!(
981                "extensible-array max_nelmts_bits {max_nelmts_bits} is out of \
982                 range for data_blk_min_elmts {data_blk_min_elmts}"
983            )));
984        }
985        if max_dblk_page_nelmts_bits >= 64 {
986            return Err(FormatError::InvalidData(format!(
987                "extensible-array max_dblk_page_nelmts_bits \
988                 {max_dblk_page_nelmts_bits} is too large"
989            )));
990        }
991        let nsblks = 1 + (max_nelmts_bits as usize - min_bits as usize);
992        let iblock_nsblks = 2 * log2_pow2(sup) as usize;
993        if iblock_nsblks > nsblks {
994            return Err(FormatError::InvalidData(
995                "extensible-array index block would hold more super blocks \
996                 than the array contains"
997                    .into(),
998            ));
999        }
1000        let overflow = || {
1001            FormatError::InvalidData(
1002                "extensible-array geometry overflows the 64-bit index space".into(),
1003            )
1004        };
1005        let mut sblk = Vec::with_capacity(nsblks);
1006        let mut start_idx = 0u64;
1007        let mut start_dblk = 0u64;
1008        for u in 0..nsblks {
1009            let ndblks = 1u64 << (u / 2);
1010            let dblk_nelmts = (1u64 << (u as u64).div_ceil(2)) * min;
1011            sblk.push(EaSblkInfo {
1012                ndblks,
1013                dblk_nelmts,
1014                start_idx,
1015                start_dblk,
1016            });
1017            start_idx = start_idx
1018                .checked_add(ndblks.checked_mul(dblk_nelmts).ok_or_else(overflow)?)
1019                .ok_or_else(overflow)?;
1020            start_dblk = start_dblk.checked_add(ndblks).ok_or_else(overflow)?;
1021        }
1022        Ok(Self {
1023            idx_blk_elmts: idx_blk_elmts as u64,
1024            data_blk_min_elmts: min,
1025            dblk_page_nelmts: 1u64 << max_dblk_page_nelmts_bits,
1026            iblock_nsblks,
1027            ndblk_addrs: 2 * (sup_blk_min_data_ptrs as usize - 1),
1028            nsblk_addrs: nsblks - iblock_nsblks,
1029            sblk,
1030        })
1031    }
1032
1033    /// Super-block index containing chunk `idx` (`idx >= idx_blk_elmts`).
1034    pub fn sblk_index(&self, idx: u64) -> usize {
1035        let e = idx - self.idx_blk_elmts;
1036        log2_floor(e / self.data_blk_min_elmts + 1) as usize
1037    }
1038
1039    /// Whether super block `u`'s data blocks are paged (data block larger
1040    /// than one page).
1041    pub fn is_sblk_paged(&self, u: usize) -> bool {
1042        self.sblk[u].dblk_nelmts > self.dblk_page_nelmts
1043    }
1044
1045    /// Number of pages in each data block of super block `u`.
1046    pub fn npages(&self, u: usize) -> u64 {
1047        if self.is_sblk_paged(u) {
1048            self.sblk[u].dblk_nelmts / self.dblk_page_nelmts
1049        } else {
1050            0
1051        }
1052    }
1053
1054    /// Size of the page-init bitmap for one data block of super block `u`.
1055    pub fn dblk_page_init_size(&self, u: usize) -> usize {
1056        (self.npages(u) as usize).div_ceil(8)
1057    }
1058
1059    /// On-disk size of one data block page (`H5EA_DBLK_PAGE_SIZE`).
1060    pub fn dblk_page_size(&self, raw_elmt_size: usize) -> usize {
1061        self.dblk_page_nelmts as usize * raw_elmt_size + 4
1062    }
1063
1064    /// On-disk size of a data block prefix (`H5EA_DBLOCK_PREFIX_SIZE`):
1065    /// magic + version + class + header addr + block offset + checksum.
1066    pub fn dblk_prefix_size(&self, sizeof_addr: u8, max_nelmts_bits: u8) -> usize {
1067        let arr_off = ExtensibleArrayDataBlock::block_offset_size(max_nelmts_bits);
1068        4 + 1 + 1 + sizeof_addr as usize + arr_off + 4
1069    }
1070
1071    /// Locate chunk `idx` within the array.
1072    ///
1073    /// Returns an error when `idx` exceeds the array's capacity (rather than
1074    /// panicking on an out-of-bounds super-block index).
1075    pub fn locate(&self, idx: u64) -> FormatResult<EaLoc> {
1076        if idx < self.idx_blk_elmts {
1077            return Ok(EaLoc::Index { elem: idx as usize });
1078        }
1079        let sblk_idx = self.sblk_index(idx);
1080        let Some(&s) = self.sblk.get(sblk_idx) else {
1081            return Err(FormatError::InvalidData(format!(
1082                "chunk index {idx} exceeds the extensible array's capacity"
1083            )));
1084        };
1085        let elmt = (idx - self.idx_blk_elmts) - s.start_idx;
1086        let local_dblk = elmt / s.dblk_nelmts;
1087        let offset_in_dblk = elmt % s.dblk_nelmts;
1088        let paged = s.dblk_nelmts > self.dblk_page_nelmts;
1089        let path = if sblk_idx < self.iblock_nsblks {
1090            let global_dblk = s.start_dblk + local_dblk;
1091            EaDblkPath::Direct {
1092                idx: global_dblk as usize,
1093            }
1094        } else {
1095            EaDblkPath::ViaSblk {
1096                sblk_off: sblk_idx - self.iblock_nsblks,
1097                local_dblk: local_dblk as usize,
1098                ndblks_in_sblk: s.ndblks as usize,
1099                sblk_block_offset: s.start_idx,
1100            }
1101        };
1102        // libhdf5 stamps the data block's block_offset using the *global*
1103        // data-block index for index-block data blocks, and the local index
1104        // for super-block data blocks (H5EA__lookup_elmt).
1105        let dblk_block_offset = match path {
1106            EaDblkPath::Direct { idx } => s.start_idx + (idx as u64) * s.dblk_nelmts,
1107            EaDblkPath::ViaSblk { .. } => s.start_idx + local_dblk * s.dblk_nelmts,
1108        };
1109        Ok(EaLoc::Dblk(EaChunkLoc {
1110            sblk_idx,
1111            dblk_nelmts: s.dblk_nelmts,
1112            offset_in_dblk,
1113            dblk_block_offset,
1114            paged,
1115            path,
1116        }))
1117    }
1118}
1119
1120/// Extensible array super block (EASB).
1121///
1122/// Holds the data-block addresses for one super block. Used for super blocks
1123/// whose data-block addresses do not fit in the index block. Paged data
1124/// blocks (very large arrays) are not supported.
1125///
1126/// On-disk layout:
1127/// ```text
1128/// "EASB"(4) + version(1) + class_id(1)
1129/// + header_addr(sizeof_addr)
1130/// + block_offset(arr_off_size)
1131/// + [page-init bitmaps: ndblks * dblk_page_init_size — only if data blocks paged]
1132/// + data_block_addresses(ndblks * sizeof_addr)
1133/// + checksum(4)
1134/// ```
1135#[derive(Debug, Clone, PartialEq)]
1136pub struct ExtensibleArraySuperBlock {
1137    pub class_id: u8,
1138    pub header_addr: u64,
1139    pub block_offset: u64,
1140    pub dblk_addrs: Vec<u64>,
1141    /// Page-init bitmaps (`ndblks * dblk_page_init_size` bytes); empty when
1142    /// the super block's data blocks are not paged.
1143    pub page_init: Vec<u8>,
1144}
1145
1146impl ExtensibleArraySuperBlock {
1147    /// Create an empty (non-paged) super block with `ndblks` undefined slots.
1148    pub fn new(class_id: u8, header_addr: u64, block_offset: u64, ndblks: usize) -> Self {
1149        Self {
1150            class_id,
1151            header_addr,
1152            block_offset,
1153            dblk_addrs: vec![UNDEF_ADDR; ndblks],
1154            page_init: Vec::new(),
1155        }
1156    }
1157
1158    pub fn encode(&self, ctx: &FormatContext, max_nelmts_bits: u8) -> Vec<u8> {
1159        let sa = ctx.sizeof_addr as usize;
1160        let bo = ExtensibleArrayDataBlock::block_offset_size(max_nelmts_bits);
1161        let size = 4 + 1 + 1 + sa + bo + self.page_init.len() + self.dblk_addrs.len() * sa + 4;
1162        let mut buf = Vec::with_capacity(size);
1163        buf.extend_from_slice(&EASB_SIGNATURE);
1164        buf.push(EA_VERSION);
1165        buf.push(self.class_id);
1166        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
1167        buf.extend_from_slice(&self.block_offset.to_le_bytes()[..bo]);
1168        buf.extend_from_slice(&self.page_init);
1169        for &a in &self.dblk_addrs {
1170            buf.extend_from_slice(&a.to_le_bytes()[..sa]);
1171        }
1172        let cksum = checksum_metadata(&buf);
1173        buf.extend_from_slice(&cksum.to_le_bytes());
1174        debug_assert_eq!(buf.len(), size);
1175        buf
1176    }
1177
1178    /// Decode a super block. `page_init_total` is the total size of the
1179    /// page-init bitmap region (`ndblks * dblk_page_init_size`); pass 0 when
1180    /// the super block's data blocks are not paged.
1181    pub fn decode(
1182        buf: &[u8],
1183        ctx: &FormatContext,
1184        max_nelmts_bits: u8,
1185        ndblks: usize,
1186        page_init_total: usize,
1187    ) -> FormatResult<Self> {
1188        let sa = ctx.sizeof_addr as usize;
1189        let bo = ExtensibleArrayDataBlock::block_offset_size(max_nelmts_bits);
1190        let min_size = ndblks
1191            .saturating_mul(sa)
1192            .saturating_add((10 + sa + bo).saturating_add(page_init_total));
1193        if buf.len() < min_size {
1194            return Err(FormatError::BufferTooShort {
1195                needed: min_size,
1196                available: buf.len(),
1197            });
1198        }
1199        if buf[0..4] != EASB_SIGNATURE {
1200            return Err(FormatError::InvalidSignature);
1201        }
1202        if buf[4] != EA_VERSION {
1203            return Err(FormatError::InvalidVersion(buf[4]));
1204        }
1205        let data_end = min_size - 4;
1206        let stored = u32::from_le_bytes([
1207            buf[data_end],
1208            buf[data_end + 1],
1209            buf[data_end + 2],
1210            buf[data_end + 3],
1211        ]);
1212        let computed = checksum_metadata(&buf[..data_end]);
1213        if stored != computed {
1214            return Err(FormatError::ChecksumMismatch {
1215                expected: stored,
1216                computed,
1217            });
1218        }
1219        let class_id = buf[5];
1220        let mut pos = 6;
1221        let header_addr = read_addr(&buf[pos..], sa);
1222        pos += sa;
1223        let block_offset = read_size(&buf[pos..], bo);
1224        pos += bo;
1225        let page_init = buf[pos..pos + page_init_total].to_vec();
1226        pos += page_init_total;
1227        let mut dblk_addrs = Vec::with_capacity(ndblks);
1228        for _ in 0..ndblks {
1229            dblk_addrs.push(read_addr(&buf[pos..], sa));
1230            pos += sa;
1231        }
1232        Ok(Self {
1233            class_id,
1234            header_addr,
1235            block_offset,
1236            dblk_addrs,
1237            page_init,
1238        })
1239    }
1240}
1241
1242/// Compute nsblk_addrs for the index block: the number of super block
1243/// address slots stored in the EAIB.
1244pub fn compute_nsblk_addrs(
1245    idx_blk_elmts: u8,
1246    data_blk_min_elmts: u8,
1247    sup_blk_min_data_ptrs: u8,
1248    max_nelmts_bits: u8,
1249) -> FormatResult<usize> {
1250    Ok(EaGeometry::new(
1251        idx_blk_elmts,
1252        data_blk_min_elmts,
1253        sup_blk_min_data_ptrs,
1254        max_nelmts_bits,
1255        10,
1256    )?
1257    .nsblk_addrs)
1258}
1259
1260// ======================================================================= tests
1261
1262#[cfg(test)]
1263mod tests {
1264    use super::*;
1265
1266    fn ctx8() -> FormatContext {
1267        FormatContext {
1268            sizeof_addr: 8,
1269            sizeof_size: 8,
1270        }
1271    }
1272
1273    fn ctx4() -> FormatContext {
1274        FormatContext {
1275            sizeof_addr: 4,
1276            sizeof_size: 4,
1277        }
1278    }
1279
1280    #[test]
1281    fn header_roundtrip() {
1282        let mut hdr = ExtensibleArrayHeader::new_for_chunks(&ctx8());
1283        hdr.idx_blk_addr = 0x1000;
1284        hdr.max_idx_set = 3;
1285        hdr.num_elmts_realized = 4;
1286
1287        let encoded = hdr.encode(&ctx8());
1288        assert_eq!(encoded.len(), hdr.encoded_size(&ctx8()));
1289        assert_eq!(&encoded[..4], b"EAHD");
1290
1291        let decoded = ExtensibleArrayHeader::decode(&encoded, &ctx8()).unwrap();
1292        assert_eq!(decoded, hdr);
1293    }
1294
1295    #[test]
1296    fn header_roundtrip_ctx4() {
1297        let mut hdr = ExtensibleArrayHeader::new_for_chunks(&ctx4());
1298        hdr.raw_elmt_size = 4;
1299        hdr.idx_blk_addr = 0x800;
1300
1301        let encoded = hdr.encode(&ctx4());
1302        let decoded = ExtensibleArrayHeader::decode(&encoded, &ctx4()).unwrap();
1303        assert_eq!(decoded, hdr);
1304    }
1305
1306    #[test]
1307    fn header_bad_signature() {
1308        let mut hdr = ExtensibleArrayHeader::new_for_chunks(&ctx8());
1309        hdr.idx_blk_addr = 0x1000;
1310        let mut encoded = hdr.encode(&ctx8());
1311        encoded[0] = b'X';
1312        let err = ExtensibleArrayHeader::decode(&encoded, &ctx8()).unwrap_err();
1313        assert!(matches!(err, FormatError::InvalidSignature));
1314    }
1315
1316    #[test]
1317    fn header_checksum_mismatch() {
1318        let mut hdr = ExtensibleArrayHeader::new_for_chunks(&ctx8());
1319        hdr.idx_blk_addr = 0x1000;
1320        let mut encoded = hdr.encode(&ctx8());
1321        encoded[6] ^= 0xFF; // corrupt a byte
1322        let err = ExtensibleArrayHeader::decode(&encoded, &ctx8()).unwrap_err();
1323        assert!(matches!(err, FormatError::ChecksumMismatch { .. }));
1324    }
1325
1326    #[test]
1327    fn index_block_roundtrip() {
1328        let ndblk = compute_ndblk_addrs(4).unwrap();
1329        assert_eq!(ndblk, 6);
1330
1331        let mut iblk = ExtensibleArrayIndexBlock::new(0x500, 4, ndblk, 0);
1332        iblk.elements[0] = 0x1000;
1333        iblk.elements[1] = 0x2000;
1334        iblk.dblk_addrs[0] = 0x3000;
1335
1336        let encoded = iblk.encode(&ctx8());
1337        assert_eq!(encoded.len(), iblk.encoded_size(&ctx8()));
1338        assert_eq!(&encoded[..4], b"EAIB");
1339
1340        let decoded = ExtensibleArrayIndexBlock::decode(&encoded, &ctx8(), 4, ndblk, 0).unwrap();
1341        assert_eq!(decoded, iblk);
1342    }
1343
1344    #[test]
1345    fn index_block_roundtrip_ctx4() {
1346        let iblk = ExtensibleArrayIndexBlock::new(0x300, 4, 6, 0);
1347        let encoded = iblk.encode(&ctx4());
1348        let decoded = ExtensibleArrayIndexBlock::decode(&encoded, &ctx4(), 4, 6, 0).unwrap();
1349        assert_eq!(decoded, iblk);
1350    }
1351
1352    #[test]
1353    fn index_block_bad_checksum() {
1354        let iblk = ExtensibleArrayIndexBlock::new(0x500, 4, 6, 0);
1355        let mut encoded = iblk.encode(&ctx8());
1356        encoded[8] ^= 0xFF;
1357        let err = ExtensibleArrayIndexBlock::decode(&encoded, &ctx8(), 4, 6, 0).unwrap_err();
1358        assert!(matches!(err, FormatError::ChecksumMismatch { .. }));
1359    }
1360
1361    #[test]
1362    fn data_block_roundtrip() {
1363        let mut dblk = ExtensibleArrayDataBlock::new(0x500, 4, 16);
1364        dblk.elements[0] = 0xA000;
1365        dblk.elements[5] = 0xB000;
1366
1367        let encoded = dblk.encode(&ctx8(), 32);
1368        assert_eq!(encoded.len(), dblk.encoded_size(&ctx8(), 32));
1369        assert_eq!(&encoded[..4], b"EADB");
1370
1371        let decoded = ExtensibleArrayDataBlock::decode(&encoded, &ctx8(), 32, 16).unwrap();
1372        assert_eq!(decoded, dblk);
1373    }
1374
1375    #[test]
1376    fn data_block_offset_size() {
1377        assert_eq!(ExtensibleArrayDataBlock::block_offset_size(8), 1);
1378        assert_eq!(ExtensibleArrayDataBlock::block_offset_size(16), 2);
1379        assert_eq!(ExtensibleArrayDataBlock::block_offset_size(32), 4);
1380        assert_eq!(ExtensibleArrayDataBlock::block_offset_size(0), 1);
1381    }
1382
1383    #[test]
1384    fn compute_ndblk_addrs_default() {
1385        // sup_blk_min_data_ptrs=4 => ndblk=6
1386        assert_eq!(compute_ndblk_addrs(4).unwrap(), 6);
1387        assert_eq!(compute_ndblk_addrs(2).unwrap(), 2);
1388        assert!(compute_ndblk_addrs(0).is_err());
1389    }
1390
1391    #[test]
1392    fn compute_nsblk_addrs_default() {
1393        // Default params: idx_blk_elmts=4, data_blk_min_elmts=16,
1394        // sup_blk_min_data_ptrs=4, max_nelmts_bits=32
1395        // Should give nsblk_addrs=25 (matching HDF5 library)
1396        assert_eq!(compute_nsblk_addrs(4, 16, 4, 32).unwrap(), 25);
1397    }
1398
1399    #[test]
1400    fn ea_geometry_rejects_malformed_params() {
1401        // data_blk_min_elmts not a power of two.
1402        assert!(EaGeometry::new(4, 17, 4, 32, 10).is_err());
1403        // data_blk_min_elmts zero.
1404        assert!(EaGeometry::new(4, 0, 4, 32, 10).is_err());
1405        // sup_blk_min_data_ptrs zero.
1406        assert!(EaGeometry::new(4, 16, 0, 32, 10).is_err());
1407        // sup_blk_min_data_ptrs not a power of two.
1408        assert!(EaGeometry::new(4, 16, 3, 32, 10).is_err());
1409        // max_nelmts_bits smaller than log2(data_blk_min_elmts).
1410        assert!(EaGeometry::new(4, 16, 4, 3, 10).is_err());
1411        // Well-formed default params still succeed.
1412        assert!(EaGeometry::new(4, 16, 4, 32, 10).is_ok());
1413    }
1414
1415    #[test]
1416    fn ea_locate_rejects_out_of_capacity_index() {
1417        let g = EaGeometry::new(4, 16, 4, 32, 10).unwrap();
1418        // 2^32 elements is the capacity; an index past it must error, not panic.
1419        assert!(g.locate(u64::MAX).is_err());
1420    }
1421
1422    #[test]
1423    fn ea_geometry_matches_libhdf5() {
1424        // Verified against an h5py/libhdf5-produced EA file (/tmp/ea_ref.h5).
1425        let g = EaGeometry::new(4, 16, 4, 32, 10).unwrap();
1426        assert_eq!(g.sblk.len(), 29, "nsblks");
1427        assert_eq!(g.iblock_nsblks, 4);
1428        assert_eq!(g.ndblk_addrs, 6);
1429        assert_eq!(g.nsblk_addrs, 25);
1430        // (ndblks, dblk_nelmts, start_idx, start_dblk) for the first 5 sblks.
1431        let expect = [
1432            (1u64, 16u64, 0u64, 0u64),
1433            (1, 32, 16, 1),
1434            (2, 32, 48, 2),
1435            (2, 64, 112, 4),
1436            (4, 64, 240, 6),
1437        ];
1438        for (u, &(nd, dn, si, sd)) in expect.iter().enumerate() {
1439            let s = g.sblk[u];
1440            assert_eq!(
1441                (s.ndblks, s.dblk_nelmts, s.start_idx, s.start_dblk),
1442                (nd, dn, si, sd),
1443                "super block {}",
1444                u
1445            );
1446        }
1447        // chunk 4 (e=0): super block 0, direct data block 0, block_offset 0.
1448        match g.locate(4).unwrap() {
1449            EaLoc::Dblk(l) => {
1450                assert_eq!(l.sblk_idx, 0);
1451                assert!(matches!(l.path, EaDblkPath::Direct { idx: 0 }));
1452                assert_eq!(l.dblk_block_offset, 0);
1453            }
1454            _ => panic!("expected data block"),
1455        }
1456        // chunk 20 (e=16): super block 1, direct data block 1, block_offset 48.
1457        match g.locate(20).unwrap() {
1458            EaLoc::Dblk(l) => {
1459                assert!(matches!(l.path, EaDblkPath::Direct { idx: 1 }));
1460                assert_eq!(l.dblk_block_offset, 48);
1461            }
1462            _ => panic!("expected data block"),
1463        }
1464        // chunk 244 (e=240): super block 4 -> reached via the index block's
1465        // super-block address array.
1466        match g.locate(244).unwrap() {
1467            EaLoc::Dblk(l) => {
1468                assert_eq!(l.sblk_idx, 4);
1469                match l.path {
1470                    EaDblkPath::ViaSblk {
1471                        sblk_off,
1472                        local_dblk,
1473                        sblk_block_offset,
1474                        ..
1475                    } => assert_eq!((sblk_off, local_dblk, sblk_block_offset), (0, 0, 240)),
1476                    _ => panic!("expected super-block path"),
1477                }
1478            }
1479            _ => panic!("expected data block"),
1480        }
1481    }
1482}