Skip to main content

hdf5_reader/
extensible_array.rs

1//! HDF5 Extensible Array (EA) chunk index.
2//!
3//! This is the default chunk index for datasets with one unlimited dimension
4//! and `libver='latest'`. It uses a three-level hierarchy:
5//!
6//! - `EAHD` — Extensible Array Header
7//! - `EAIB` — Extensible Array Index Block
8//! - `EADB` — Extensible Array Data Block
9//! - `EASB` — Extensible Array Secondary Block
10
11use crate::checksum::jenkins_lookup3;
12use crate::chunk_index::ChunkEntry;
13use crate::error::{Error, Result};
14use crate::io::Cursor;
15use crate::storage::Storage;
16
17const EAHD_SIGNATURE: [u8; 4] = *b"EAHD";
18const EAIB_SIGNATURE: [u8; 4] = *b"EAIB";
19const EADB_SIGNATURE: [u8; 4] = *b"EADB";
20const EASB_SIGNATURE: [u8; 4] = *b"EASB";
21
22/// Parsed Extensible Array Header.
23#[derive(Debug)]
24struct EaHeader {
25    client_id: u8,
26    element_size: u8,
27    _max_nelmts_bits: u8,
28    idx_blk_elmts: u8,
29    data_blk_min_elmts: u8,
30    sec_blk_min_data_ptrs: u8,
31    max_dblk_page_nelmts_bits: u8,
32    _nelmts: u64,
33    index_block_address: u64,
34}
35
36/// Parse the Extensible Array Header.
37///
38/// On-disk layout (from H5EA_HEADER_SIZE):
39/// sig(4) + ver(1) + client_id(1) +
40/// element_size(1) + max_nelmts_bits(1) + idx_blk_elmts(1) +
41/// data_blk_min_elmts(1) + sec_blk_min_data_ptrs(1) + max_dblk_page_nelmts_bits(1) +
42/// 6 statistics fields (each length_size) +
43/// index_block_address(offset_size) + checksum(4)
44fn parse_header(data: &[u8], address: u64, offset_size: u8, length_size: u8) -> Result<EaHeader> {
45    let mut cursor = Cursor::new(data);
46    cursor.set_position(address);
47
48    let sig = cursor.read_bytes(4)?;
49    if sig != EAHD_SIGNATURE {
50        return Err(Error::InvalidExtensibleArraySignature {
51            context: "header signature mismatch",
52        });
53    }
54
55    let version = cursor.read_u8()?;
56    if version != 0 {
57        return Err(Error::Other(format!(
58            "unsupported extensible array header version {}",
59            version
60        )));
61    }
62
63    let client_id = cursor.read_u8()?;
64    let element_size = cursor.read_u8()?;
65    let max_nelmts_bits = cursor.read_u8()?;
66    let idx_blk_elmts = cursor.read_u8()?;
67    let data_blk_min_elmts = cursor.read_u8()?;
68    let sec_blk_min_data_ptrs = cursor.read_u8()?;
69    let max_dblk_page_nelmts_bits = cursor.read_u8()?;
70
71    // Statistics (6 fields, each length_size bytes)
72    let _nsuper_blks = cursor.read_length(length_size)?;
73    let _super_blk_size = cursor.read_length(length_size)?;
74    let _ndata_blks = cursor.read_length(length_size)?;
75    let _data_blk_size = cursor.read_length(length_size)?;
76    let _max_idx_set = cursor.read_length(length_size)?;
77    let nelmts = cursor.read_length(length_size)?;
78
79    let index_block_address = cursor.read_offset(offset_size)?;
80
81    // Checksum
82    let header_end = cursor.position();
83    let header_bytes = &data[address as usize..header_end as usize];
84    let stored_checksum = cursor.read_u32_le()?;
85    let computed = jenkins_lookup3(header_bytes);
86    if stored_checksum != computed {
87        return Err(Error::ChecksumMismatch {
88            expected: stored_checksum,
89            actual: computed,
90        });
91    }
92
93    Ok(EaHeader {
94        client_id,
95        element_size,
96        _max_nelmts_bits: max_nelmts_bits,
97        idx_blk_elmts,
98        data_blk_min_elmts,
99        sec_blk_min_data_ptrs,
100        max_dblk_page_nelmts_bits,
101        _nelmts: nelmts,
102        index_block_address,
103    })
104}
105
106fn parse_header_storage(
107    storage: &dyn Storage,
108    address: u64,
109    offset_size: u8,
110    length_size: u8,
111) -> Result<EaHeader> {
112    let header_len = 4
113        + 1
114        + 1
115        + 1
116        + 1
117        + 1
118        + 1
119        + 1
120        + 1
121        + 6 * usize::from(length_size)
122        + usize::from(offset_size)
123        + 4;
124    let bytes = storage.read_range(address, header_len)?;
125    parse_header(bytes.as_ref(), 0, offset_size, length_size)
126}
127
128/// Compute the super block layout.
129///
130/// Returns a vec of (elements_per_data_block, num_data_blocks) for each super block.
131/// Stops generating entries once cumulative capacity exceeds `nelmts`.
132fn compute_super_block_layout(header: &EaHeader) -> Vec<(u64, u64)> {
133    let mut layout = Vec::new();
134    let dblk_min = header.data_blk_min_elmts as u64;
135    let sblk_min = header.sec_blk_min_data_ptrs as u64;
136    let nelmts = header._nelmts;
137    let mut cumulative = header.idx_blk_elmts as u64;
138
139    for sb_idx in 0u32..64 {
140        if cumulative >= nelmts {
141            break;
142        }
143        let elmts_per_dblk = dblk_min * (1u64 << (sb_idx / 2));
144        let num_dblks = sblk_min * (1u64 << (sb_idx.div_ceil(2)));
145        layout.push((elmts_per_dblk, num_dblks));
146        cumulative += elmts_per_dblk * num_dblks;
147    }
148
149    layout
150}
151
152/// A single raw entry.
153struct EaRawEntry {
154    address: u64,
155    chunk_size: u64,
156    filter_mask: u32,
157}
158
159/// Read `count` entries from the cursor.
160fn read_entries(
161    cursor: &mut Cursor<'_>,
162    count: usize,
163    is_filtered: bool,
164    offset_size: u8,
165    entry_size: u8,
166) -> Result<Vec<EaRawEntry>> {
167    let mut entries = Vec::with_capacity(count);
168    for _ in 0..count {
169        let address = cursor.read_offset(offset_size)?;
170        let (chunk_size, filter_mask) = if is_filtered {
171            let chunk_size_len = entry_size
172                .checked_sub(offset_size)
173                .and_then(|remaining| remaining.checked_sub(4))
174                .ok_or_else(|| Error::InvalidData("invalid extensible array entry size".into()))?;
175            let cs = cursor.read_length(chunk_size_len)?;
176            let fm = cursor.read_u32_le()?;
177            (cs, fm)
178        } else {
179            (0, 0)
180        };
181        entries.push(EaRawEntry {
182            address,
183            chunk_size,
184            filter_mask,
185        });
186    }
187    Ok(entries)
188}
189
190/// Parse a data block and return its entries.
191///
192/// `sizeof_nelmts` is `ceil(max_nelmts_bits / 8)` — used for the block_off field.
193#[allow(clippy::too_many_arguments)]
194fn parse_data_block(
195    data: &[u8],
196    address: u64,
197    num_entries: usize,
198    is_filtered: bool,
199    max_page_bits: u8,
200    offset_size: u8,
201    entry_size: u8,
202    sizeof_nelmts: usize,
203) -> Result<Vec<EaRawEntry>> {
204    let mut cursor = Cursor::new(data);
205    cursor.set_position(address);
206
207    let sig = cursor.read_bytes(4)?;
208    if sig != EADB_SIGNATURE {
209        return Err(Error::InvalidExtensibleArraySignature {
210            context: "data block signature mismatch",
211        });
212    }
213
214    let version = cursor.read_u8()?;
215    if version != 0 {
216        return Err(Error::Other(format!(
217            "unsupported extensible array data block version {}",
218            version
219        )));
220    }
221
222    let _client_id = cursor.read_u8()?;
223    let _header_address = cursor.read_offset(offset_size)?;
224
225    // Block offset: sizeof_nelmts bytes indicating this block's element index offset.
226    cursor.skip(sizeof_nelmts)?;
227
228    // Paging is used only when nelmts exceeds 2^page_bits.
229    let page_nelmts = if max_page_bits > 0 {
230        1usize << max_page_bits
231    } else {
232        0
233    };
234
235    if page_nelmts > 0 && num_entries > page_nelmts {
236        // Paged data block
237        let num_pages = num_entries.div_ceil(page_nelmts);
238        let bitmap_bytes = num_pages.div_ceil(8);
239        let page_bitmap = cursor.read_bytes(bitmap_bytes)?.to_vec();
240
241        let mut all_entries = Vec::with_capacity(num_entries);
242        for page_idx in 0..num_pages {
243            let byte_idx = page_idx / 8;
244            let bit_idx = page_idx % 8;
245            let page_initialized =
246                byte_idx < page_bitmap.len() && (page_bitmap[byte_idx] & (1 << bit_idx)) != 0;
247
248            let entries_in_page = if page_idx == num_pages - 1 {
249                let remainder = num_entries % page_nelmts;
250                if remainder == 0 {
251                    page_nelmts
252                } else {
253                    remainder
254                }
255            } else {
256                page_nelmts
257            };
258
259            if page_initialized {
260                let page_entries = read_entries(
261                    &mut cursor,
262                    entries_in_page,
263                    is_filtered,
264                    offset_size,
265                    entry_size,
266                )?;
267                let _page_checksum = cursor.read_u32_le()?;
268                all_entries.extend(page_entries);
269            } else {
270                for _ in 0..entries_in_page {
271                    all_entries.push(EaRawEntry {
272                        address: u64::MAX,
273                        chunk_size: 0,
274                        filter_mask: 0,
275                    });
276                }
277            }
278        }
279        Ok(all_entries)
280    } else {
281        // Non-paged data block
282        let entries = read_entries(
283            &mut cursor,
284            num_entries,
285            is_filtered,
286            offset_size,
287            entry_size,
288        )?;
289        let _checksum = cursor.read_u32_le()?;
290        Ok(entries)
291    }
292}
293
294/// Parse a secondary block and return its data block addresses.
295fn parse_secondary_block(
296    data: &[u8],
297    address: u64,
298    num_dblk_addrs: usize,
299    offset_size: u8,
300    sizeof_nelmts: usize,
301    page_bitmap_bytes: usize,
302) -> Result<Vec<u64>> {
303    let mut cursor = Cursor::new(data);
304    cursor.set_position(address);
305
306    let sig = cursor.read_bytes(4)?;
307    if sig != EASB_SIGNATURE {
308        return Err(Error::InvalidExtensibleArraySignature {
309            context: "secondary block signature mismatch",
310        });
311    }
312
313    let version = cursor.read_u8()?;
314    if version != 0 {
315        return Err(Error::Other(format!(
316            "unsupported extensible array secondary block version {}",
317            version
318        )));
319    }
320
321    let _client_id = cursor.read_u8()?;
322    let _header_address = cursor.read_offset(offset_size)?;
323    cursor.skip(sizeof_nelmts)?;
324
325    if page_bitmap_bytes > 0 {
326        cursor.skip(page_bitmap_bytes)?;
327    }
328
329    let mut addrs = Vec::with_capacity(num_dblk_addrs);
330    for _ in 0..num_dblk_addrs {
331        addrs.push(cursor.read_offset(offset_size)?);
332    }
333
334    // Skip checksum
335    let _checksum = cursor.read_u32_le()?;
336
337    Ok(addrs)
338}
339
340fn parse_secondary_block_storage(
341    storage: &dyn Storage,
342    address: u64,
343    num_dblk_addrs: usize,
344    offset_size: u8,
345    sizeof_nelmts: usize,
346    page_bitmap_bytes: usize,
347) -> Result<Vec<u64>> {
348    let _len = 4
349        + 1
350        + 1
351        + usize::from(offset_size)
352        + sizeof_nelmts
353        + page_bitmap_bytes
354        + num_dblk_addrs * usize::from(offset_size)
355        + 4;
356    let read_len = usize::try_from(storage.len().saturating_sub(address)).map_err(|_| {
357        Error::InvalidData(
358            "extensible array secondary block exceeds platform usize capacity".into(),
359        )
360    })?;
361    let bytes = storage.read_range(address, read_len)?;
362    parse_secondary_block(
363        bytes.as_ref(),
364        0,
365        num_dblk_addrs,
366        offset_size,
367        sizeof_nelmts,
368        page_bitmap_bytes,
369    )
370}
371
372fn read_entry_at(
373    data: &[u8],
374    position: u64,
375    is_filtered: bool,
376    offset_size: u8,
377    entry_size: u8,
378) -> Result<EaRawEntry> {
379    let mut cursor = Cursor::new(data);
380    cursor.set_position(position);
381    let mut entries = read_entries(&mut cursor, 1, is_filtered, offset_size, entry_size)?;
382    entries
383        .pop()
384        .ok_or_else(|| Error::InvalidData("missing extensible array entry".into()))
385}
386
387fn read_entry_at_storage(
388    storage: &dyn Storage,
389    position: u64,
390    is_filtered: bool,
391    offset_size: u8,
392    entry_size: u8,
393) -> Result<EaRawEntry> {
394    let bytes = storage.read_range(position, usize::from(entry_size))?;
395    let mut cursor = Cursor::new(bytes.as_ref());
396    let mut entries = read_entries(&mut cursor, 1, is_filtered, offset_size, entry_size)?;
397    entries
398        .pop()
399        .ok_or_else(|| Error::InvalidData("missing extensible array entry".into()))
400}
401
402fn linear_target_offsets(
403    dataset_shape: &[u64],
404    chunk_dims: &[u32],
405    chunk_bounds: Option<(&[u64], &[u64])>,
406) -> Vec<(usize, Vec<u64>)> {
407    let ndim = dataset_shape.len();
408    let chunks_per_dim: Vec<u64> = (0..ndim)
409        .map(|i| dataset_shape[i].div_ceil(chunk_dims[i] as u64))
410        .collect();
411
412    if ndim == 0 {
413        return vec![(0, Vec::new())];
414    }
415
416    let (first_chunk, last_chunk): (Vec<u64>, Vec<u64>) = match chunk_bounds {
417        Some((first, last)) => (first.to_vec(), last.to_vec()),
418        None => (
419            vec![0u64; ndim],
420            chunks_per_dim
421                .iter()
422                .map(|count| count.saturating_sub(1))
423                .collect(),
424        ),
425    };
426
427    let mut targets = Vec::new();
428    let mut chunk_indices = first_chunk.clone();
429    loop {
430        let mut linear_idx = 0u64;
431        for (dim, chunk_index) in chunk_indices.iter().enumerate() {
432            linear_idx = linear_idx * chunks_per_dim[dim] + chunk_index;
433        }
434        let offsets = chunk_indices
435            .iter()
436            .enumerate()
437            .map(|(dim, chunk_index)| chunk_index * u64::from(chunk_dims[dim]))
438            .collect();
439        targets.push((linear_idx as usize, offsets));
440
441        let mut advanced = false;
442        for dim in (0..ndim).rev() {
443            if chunk_indices[dim] < last_chunk[dim] {
444                chunk_indices[dim] += 1;
445                if dim + 1 < ndim {
446                    chunk_indices[(dim + 1)..ndim].copy_from_slice(&first_chunk[(dim + 1)..ndim]);
447                }
448                advanced = true;
449                break;
450            }
451        }
452
453        if !advanced {
454            break;
455        }
456    }
457
458    targets
459}
460
461#[allow(clippy::too_many_arguments)]
462fn read_data_block_entry(
463    data: &[u8],
464    address: u64,
465    num_entries: usize,
466    local_idx: usize,
467    is_filtered: bool,
468    max_page_bits: u8,
469    offset_size: u8,
470    entry_size: u8,
471    sizeof_nelmts: usize,
472) -> Result<EaRawEntry> {
473    let mut cursor = Cursor::new(data);
474    cursor.set_position(address);
475
476    let sig = cursor.read_bytes(4)?;
477    if sig != EADB_SIGNATURE {
478        return Err(Error::InvalidExtensibleArraySignature {
479            context: "data block signature mismatch",
480        });
481    }
482
483    let version = cursor.read_u8()?;
484    if version != 0 {
485        return Err(Error::Other(format!(
486            "unsupported extensible array data block version {}",
487            version
488        )));
489    }
490
491    let _client_id = cursor.read_u8()?;
492    let _header_address = cursor.read_offset(offset_size)?;
493    cursor.skip(sizeof_nelmts)?;
494
495    let page_nelmts = if max_page_bits > 0 {
496        1usize << max_page_bits
497    } else {
498        0
499    };
500
501    if page_nelmts > 0 && num_entries > page_nelmts {
502        let num_pages = num_entries.div_ceil(page_nelmts);
503        let bitmap_bytes = num_pages.div_ceil(8);
504        let page_bitmap = cursor.read_bytes(bitmap_bytes)?.to_vec();
505        let data_start = cursor.position();
506
507        let target_page = local_idx / page_nelmts;
508        let within_page = local_idx % page_nelmts;
509        let byte_idx = target_page / 8;
510        let bit_idx = target_page % 8;
511        let page_initialized =
512            byte_idx < page_bitmap.len() && (page_bitmap[byte_idx] & (1 << bit_idx)) != 0;
513        if !page_initialized {
514            return Ok(EaRawEntry {
515                address: u64::MAX,
516                chunk_size: 0,
517                filter_mask: 0,
518            });
519        }
520
521        let mut page_start = data_start;
522        for page_idx in 0..target_page {
523            let entries_in_page = if page_idx == num_pages - 1 {
524                let remainder = num_entries % page_nelmts;
525                if remainder == 0 {
526                    page_nelmts
527                } else {
528                    remainder
529                }
530            } else {
531                page_nelmts
532            };
533            let page_byte_idx = page_idx / 8;
534            let page_bit_idx = page_idx % 8;
535            let initialized = page_byte_idx < page_bitmap.len()
536                && (page_bitmap[page_byte_idx] & (1 << page_bit_idx)) != 0;
537            if initialized {
538                page_start += (entries_in_page * entry_size as usize + 4) as u64;
539            }
540        }
541
542        let position = page_start + (within_page * entry_size as usize) as u64;
543        return read_entry_at(data, position, is_filtered, offset_size, entry_size);
544    }
545
546    let position = cursor.position() + (local_idx * entry_size as usize) as u64;
547    read_entry_at(data, position, is_filtered, offset_size, entry_size)
548}
549
550#[allow(clippy::too_many_arguments)]
551fn read_data_block_entry_storage(
552    storage: &dyn Storage,
553    address: u64,
554    num_entries: usize,
555    local_idx: usize,
556    is_filtered: bool,
557    max_page_bits: u8,
558    offset_size: u8,
559    entry_size: u8,
560    sizeof_nelmts: usize,
561) -> Result<EaRawEntry> {
562    let header_len = 4 + 1 + 1 + usize::from(offset_size) + sizeof_nelmts;
563    let header = storage.read_range(address, header_len)?;
564    let mut cursor = Cursor::new(header.as_ref());
565
566    let sig = cursor.read_bytes(4)?;
567    if sig != EADB_SIGNATURE {
568        return Err(Error::InvalidExtensibleArraySignature {
569            context: "data block signature mismatch",
570        });
571    }
572
573    let version = cursor.read_u8()?;
574    if version != 0 {
575        return Err(Error::Other(format!(
576            "unsupported extensible array data block version {}",
577            version
578        )));
579    }
580
581    let _client_id = cursor.read_u8()?;
582    let _header_address = cursor.read_offset(offset_size)?;
583    cursor.skip(sizeof_nelmts)?;
584
585    let base =
586        address + u64::try_from(header_len).map_err(|_| Error::OffsetOutOfBounds(address))?;
587    let page_nelmts = if max_page_bits > 0 {
588        1usize << max_page_bits
589    } else {
590        0
591    };
592
593    if page_nelmts > 0 && num_entries > page_nelmts {
594        let num_pages = num_entries.div_ceil(page_nelmts);
595        let bitmap_bytes = num_pages.div_ceil(8);
596        let page_bitmap = storage.read_range(base, bitmap_bytes)?;
597        let data_start = base
598            + u64::try_from(bitmap_bytes)
599                .map_err(|_| Error::InvalidData("EA bitmap size exceeds u64 capacity".into()))?;
600
601        let target_page = local_idx / page_nelmts;
602        let within_page = local_idx % page_nelmts;
603        let byte_idx = target_page / 8;
604        let bit_idx = target_page % 8;
605        let page_initialized =
606            byte_idx < page_bitmap.len() && (page_bitmap[byte_idx] & (1 << bit_idx)) != 0;
607        if !page_initialized {
608            return Ok(EaRawEntry {
609                address: u64::MAX,
610                chunk_size: 0,
611                filter_mask: 0,
612            });
613        }
614
615        let mut page_start = data_start;
616        for page_idx in 0..target_page {
617            let entries_in_page = if page_idx == num_pages - 1 {
618                let remainder = num_entries % page_nelmts;
619                if remainder == 0 {
620                    page_nelmts
621                } else {
622                    remainder
623                }
624            } else {
625                page_nelmts
626            };
627            let page_byte_idx = page_idx / 8;
628            let page_bit_idx = page_idx % 8;
629            let initialized = page_byte_idx < page_bitmap.len()
630                && (page_bitmap[page_byte_idx] & (1 << page_bit_idx)) != 0;
631            if initialized {
632                page_start += u64::try_from(entries_in_page * usize::from(entry_size) + 4)
633                    .map_err(|_| Error::InvalidData("EA page size exceeds u64 capacity".into()))?;
634            }
635        }
636
637        let position = page_start
638            + u64::try_from(within_page * usize::from(entry_size)).map_err(|_| {
639                Error::InvalidData("EA page entry offset exceeds u64 capacity".into())
640            })?;
641        return read_entry_at_storage(storage, position, is_filtered, offset_size, entry_size);
642    }
643
644    let position = base
645        + u64::try_from(local_idx * usize::from(entry_size))
646            .map_err(|_| Error::InvalidData("EA entry offset exceeds u64 capacity".into()))?;
647    read_entry_at_storage(storage, position, is_filtered, offset_size, entry_size)
648}
649
650#[allow(clippy::too_many_arguments)]
651fn collect_extensible_array_chunk_entries_bounded(
652    data: &[u8],
653    header: &EaHeader,
654    offset_size: u8,
655    dataset_shape: &[u64],
656    chunk_dims: &[u32],
657    chunk_bounds: (&[u64], &[u64]),
658    sb_layout: &[(u64, u64)],
659    sizeof_nelmts: usize,
660) -> Result<Vec<ChunkEntry>> {
661    let is_filtered = header.client_id == 1;
662    let targets = linear_target_offsets(dataset_shape, chunk_dims, Some(chunk_bounds));
663
664    let mut cursor = Cursor::new(data);
665    cursor.set_position(header.index_block_address);
666
667    let sig = cursor.read_bytes(4)?;
668    if sig != EAIB_SIGNATURE {
669        return Err(Error::InvalidExtensibleArraySignature {
670            context: "index block signature mismatch",
671        });
672    }
673
674    let version = cursor.read_u8()?;
675    if version != 0 {
676        return Err(Error::Other(format!(
677            "unsupported extensible array index block version {}",
678            version
679        )));
680    }
681
682    let _client_id = cursor.read_u8()?;
683    let _header_address = cursor.read_offset(offset_size)?;
684
685    let num_inline = header.idx_blk_elmts as usize;
686    let inline_start = cursor.position();
687    cursor.skip(num_inline * header.element_size as usize)?;
688
689    let ndblk_addrs = 2 * header.sec_blk_min_data_ptrs as usize;
690    let mut direct_dblk_addrs = Vec::with_capacity(ndblk_addrs);
691    for _ in 0..ndblk_addrs {
692        direct_dblk_addrs.push(cursor.read_offset(offset_size)?);
693    }
694
695    let nsblks = sb_layout.len();
696    let nsblk_addrs = nsblks.saturating_sub(ndblk_addrs);
697    let mut sec_block_addrs = Vec::with_capacity(nsblk_addrs);
698    for _ in 0..nsblk_addrs {
699        sec_block_addrs.push(cursor.read_offset(offset_size)?);
700    }
701
702    let mut secondary_block_cache: Vec<Option<Vec<u64>>> = vec![None; sec_block_addrs.len()];
703    let mut entries = Vec::new();
704
705    for (linear_idx, offsets) in targets {
706        let raw = if linear_idx < num_inline {
707            read_entry_at(
708                data,
709                inline_start + (linear_idx * header.element_size as usize) as u64,
710                is_filtered,
711                offset_size,
712                header.element_size,
713            )?
714        } else {
715            let mut relative_idx = (linear_idx - num_inline) as u64;
716            let mut sb_idx = None;
717            for (candidate_idx, (elmts_per_dblk, num_dblks)) in sb_layout.iter().enumerate() {
718                let capacity = elmts_per_dblk * num_dblks;
719                if relative_idx < capacity {
720                    sb_idx = Some(candidate_idx);
721                    break;
722                }
723                relative_idx -= capacity;
724            }
725
726            let Some(sb_idx) = sb_idx else {
727                continue;
728            };
729            let (elmts_per_dblk, _) = sb_layout[sb_idx];
730            let dblk_idx = (relative_idx / elmts_per_dblk) as usize;
731            let local_idx = (relative_idx % elmts_per_dblk) as usize;
732
733            let dblk_addr = if sb_idx < 2 {
734                let base = sb_layout[..sb_idx]
735                    .iter()
736                    .map(|(_, num_dblks)| *num_dblks as usize)
737                    .sum::<usize>();
738                *direct_dblk_addrs.get(base + dblk_idx).unwrap_or(&u64::MAX)
739            } else {
740                let sec_cache_idx = sb_idx - 2;
741                if secondary_block_cache[sec_cache_idx].is_none() {
742                    let sec_addr = sec_block_addrs
743                        .get(sec_cache_idx)
744                        .copied()
745                        .unwrap_or(u64::MAX);
746                    if Cursor::is_undefined_offset(sec_addr, offset_size) {
747                        secondary_block_cache[sec_cache_idx] = Some(Vec::new());
748                    } else {
749                        let (_, num_dblks) = sb_layout[sb_idx];
750                        let page_bitmap_bytes = if header.max_dblk_page_nelmts_bits > 0
751                            && elmts_per_dblk > (1u64 << header.max_dblk_page_nelmts_bits)
752                        {
753                            let page_nelmts = 1usize << header.max_dblk_page_nelmts_bits;
754                            let pages_per_dblk = (elmts_per_dblk as usize).div_ceil(page_nelmts);
755                            (num_dblks as usize * pages_per_dblk).div_ceil(8)
756                        } else {
757                            0
758                        };
759                        secondary_block_cache[sec_cache_idx] = Some(parse_secondary_block(
760                            data,
761                            sec_addr,
762                            num_dblks as usize,
763                            offset_size,
764                            sizeof_nelmts,
765                            page_bitmap_bytes,
766                        )?);
767                    }
768                }
769
770                secondary_block_cache[sec_cache_idx]
771                    .as_ref()
772                    .and_then(|addrs| addrs.get(dblk_idx))
773                    .copied()
774                    .unwrap_or(u64::MAX)
775            };
776
777            if Cursor::is_undefined_offset(dblk_addr, offset_size) {
778                continue;
779            }
780
781            read_data_block_entry(
782                data,
783                dblk_addr,
784                elmts_per_dblk as usize,
785                local_idx,
786                is_filtered,
787                header.max_dblk_page_nelmts_bits,
788                offset_size,
789                header.element_size,
790                sizeof_nelmts,
791            )?
792        };
793
794        if Cursor::is_undefined_offset(raw.address, offset_size) {
795            continue;
796        }
797
798        entries.push(ChunkEntry {
799            address: raw.address,
800            size: raw.chunk_size,
801            filter_mask: raw.filter_mask,
802            offsets,
803        });
804    }
805
806    Ok(entries)
807}
808
809/// Collect chunk entries from an Extensible Array index.
810///
811/// Walks the EAHD → EAIB → (EADB / EASB → EADB) hierarchy and converts
812/// linear entry indices to multi-dimensional chunk offsets.
813pub fn collect_extensible_array_chunk_entries(
814    data: &[u8],
815    header_address: u64,
816    offset_size: u8,
817    length_size: u8,
818    dataset_shape: &[u64],
819    chunk_dims: &[u32],
820    chunk_bounds: Option<(&[u64], &[u64])>,
821) -> Result<Vec<ChunkEntry>> {
822    let header = parse_header(data, header_address, offset_size, length_size)?;
823
824    if Cursor::is_undefined_offset(header.index_block_address, offset_size) {
825        return Ok(Vec::new());
826    }
827
828    let is_filtered = header.client_id == 1;
829    let sb_layout = compute_super_block_layout(&header);
830    let sizeof_nelmts = (header._max_nelmts_bits as usize).div_ceil(8);
831
832    if let Some(bounds) = chunk_bounds {
833        return collect_extensible_array_chunk_entries_bounded(
834            data,
835            &header,
836            offset_size,
837            dataset_shape,
838            chunk_dims,
839            bounds,
840            &sb_layout,
841            sizeof_nelmts,
842        );
843    }
844
845    // Parse the index block.
846    let mut cursor = Cursor::new(data);
847    cursor.set_position(header.index_block_address);
848
849    let sig = cursor.read_bytes(4)?;
850    if sig != EAIB_SIGNATURE {
851        return Err(Error::InvalidExtensibleArraySignature {
852            context: "index block signature mismatch",
853        });
854    }
855
856    let version = cursor.read_u8()?;
857    if version != 0 {
858        return Err(Error::Other(format!(
859            "unsupported extensible array index block version {}",
860            version
861        )));
862    }
863
864    let _client_id = cursor.read_u8()?;
865    let _header_address = cursor.read_offset(offset_size)?;
866
867    // 1. Inline elements (idx_blk_elmts entries stored directly).
868    let num_inline = header.idx_blk_elmts as usize;
869    let inline_entries = read_entries(
870        &mut cursor,
871        num_inline,
872        is_filtered,
873        offset_size,
874        header.element_size,
875    )?;
876
877    // 2. Data block addresses stored directly in the index block.
878    // The number is 2 * sec_blk_min_data_ptrs (from HDF5: EA_IBLOCK_NDBLK_ADDRS).
879    let ndblk_addrs = 2 * header.sec_blk_min_data_ptrs as usize;
880    let mut direct_dblk_addrs = Vec::with_capacity(ndblk_addrs);
881    for _ in 0..ndblk_addrs {
882        direct_dblk_addrs.push(cursor.read_offset(offset_size)?);
883    }
884
885    // 3. Secondary block addresses for super blocks 2+.
886    // nsblk_addrs = max(0, nsblks - ndblk_addrs) where nsblks is the total
887    // number of super blocks needed to cover nelmts.
888    // compute_super_block_layout already stops once capacity >= nelmts,
889    // so sb_layout.len() is the total number of super blocks needed.
890    let nsblks = sb_layout.len();
891
892    let nsblk_addrs = nsblks.saturating_sub(ndblk_addrs);
893    let mut sec_block_addrs = Vec::with_capacity(nsblk_addrs);
894    for _ in 0..nsblk_addrs {
895        sec_block_addrs.push(cursor.read_offset(offset_size)?);
896    }
897
898    // Skip checksum at end of index block
899    let _checksum = cursor.read_u32_le()?;
900
901    // Now collect all entries.
902    let mut all_entries: Vec<EaRawEntry> = Vec::new();
903
904    // Inline entries
905    all_entries.extend(inline_entries);
906
907    // Data blocks from direct addresses (super blocks 0-1)
908    let mut dblk_addr_idx = 0;
909    for sb_idx_iter in 0..2usize.min(nsblks) {
910        if sb_idx_iter >= sb_layout.len() {
911            break;
912        }
913        let (elmts_per_dblk, num_dblks) = sb_layout[sb_idx_iter];
914        for _ in 0..num_dblks {
915            if dblk_addr_idx >= direct_dblk_addrs.len() {
916                break;
917            }
918            let dblk_addr = direct_dblk_addrs[dblk_addr_idx];
919            dblk_addr_idx += 1;
920
921            if Cursor::is_undefined_offset(dblk_addr, offset_size) {
922                for _ in 0..elmts_per_dblk {
923                    all_entries.push(EaRawEntry {
924                        address: u64::MAX,
925                        chunk_size: 0,
926                        filter_mask: 0,
927                    });
928                }
929            } else {
930                let dblk_entries = parse_data_block(
931                    data,
932                    dblk_addr,
933                    elmts_per_dblk as usize,
934                    is_filtered,
935                    header.max_dblk_page_nelmts_bits,
936                    offset_size,
937                    header.element_size,
938                    sizeof_nelmts,
939                )?;
940                all_entries.extend(dblk_entries);
941            }
942        }
943    }
944
945    // Data blocks from super blocks 2+ (via secondary blocks)
946    for (sec_idx, &sec_addr) in sec_block_addrs.iter().enumerate() {
947        let sb_idx_iter = sec_idx + 2;
948        if sb_idx_iter >= sb_layout.len() {
949            break;
950        }
951        let (elmts_per_dblk, num_dblks) = sb_layout[sb_idx_iter];
952
953        if Cursor::is_undefined_offset(sec_addr, offset_size) {
954            for _ in 0..(elmts_per_dblk * num_dblks) {
955                all_entries.push(EaRawEntry {
956                    address: u64::MAX,
957                    chunk_size: 0,
958                    filter_mask: 0,
959                });
960            }
961            continue;
962        }
963
964        // Per HDF5 spec III.H "Extensible Array Secondary Block", the secondary
965        // block contains a page initialization bitmap when data blocks are paged.
966        // Bitmap size = ceil(num_dblks * pages_per_dblk / 8).
967        let page_bitmap_bytes = if header.max_dblk_page_nelmts_bits > 0
968            && elmts_per_dblk > (1u64 << header.max_dblk_page_nelmts_bits)
969        {
970            let page_nelmts = 1usize << header.max_dblk_page_nelmts_bits;
971            let pages_per_dblk = (elmts_per_dblk as usize).div_ceil(page_nelmts);
972            (num_dblks as usize * pages_per_dblk).div_ceil(8)
973        } else {
974            0
975        };
976        let dblk_addrs = parse_secondary_block(
977            data,
978            sec_addr,
979            num_dblks as usize,
980            offset_size,
981            sizeof_nelmts,
982            page_bitmap_bytes,
983        )?;
984
985        for &dblk_addr in &dblk_addrs {
986            if Cursor::is_undefined_offset(dblk_addr, offset_size) {
987                for _ in 0..elmts_per_dblk {
988                    all_entries.push(EaRawEntry {
989                        address: u64::MAX,
990                        chunk_size: 0,
991                        filter_mask: 0,
992                    });
993                }
994            } else {
995                let dblk_entries = parse_data_block(
996                    data,
997                    dblk_addr,
998                    elmts_per_dblk as usize,
999                    is_filtered,
1000                    header.max_dblk_page_nelmts_bits,
1001                    offset_size,
1002                    header.element_size,
1003                    sizeof_nelmts,
1004                )?;
1005                all_entries.extend(dblk_entries);
1006            }
1007        }
1008    }
1009
1010    // Convert linear indices to chunk offsets.
1011    let ndim = dataset_shape.len();
1012    let chunks_per_dim: Vec<u64> = (0..ndim)
1013        .map(|i| dataset_shape[i].div_ceil(chunk_dims[i] as u64))
1014        .collect();
1015
1016    let mut entries = Vec::new();
1017    for (linear_idx, raw) in all_entries.iter().enumerate() {
1018        if Cursor::is_undefined_offset(raw.address, offset_size) {
1019            continue;
1020        }
1021
1022        let mut remaining = linear_idx as u64;
1023        let mut offsets = vec![0u64; ndim];
1024        for d in (0..ndim).rev() {
1025            offsets[d] = (remaining % chunks_per_dim[d]) * chunk_dims[d] as u64;
1026            remaining /= chunks_per_dim[d];
1027        }
1028
1029        if let Some((first_chunk, last_chunk)) = chunk_bounds {
1030            let overlaps = offsets.iter().enumerate().all(|(dim, offset)| {
1031                let chunk_index = *offset / u64::from(chunk_dims[dim]);
1032                chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
1033            });
1034            if !overlaps {
1035                continue;
1036            }
1037        }
1038
1039        entries.push(ChunkEntry {
1040            address: raw.address,
1041            size: raw.chunk_size,
1042            filter_mask: raw.filter_mask,
1043            offsets,
1044        });
1045    }
1046
1047    Ok(entries)
1048}
1049
1050/// Collect chunk entries from an Extensible Array index using random-access storage.
1051pub fn collect_extensible_array_chunk_entries_storage(
1052    storage: &dyn Storage,
1053    header_address: u64,
1054    offset_size: u8,
1055    length_size: u8,
1056    dataset_shape: &[u64],
1057    chunk_dims: &[u32],
1058    chunk_bounds: Option<(&[u64], &[u64])>,
1059) -> Result<Vec<ChunkEntry>> {
1060    let header = parse_header_storage(storage, header_address, offset_size, length_size)?;
1061
1062    if Cursor::is_undefined_offset(header.index_block_address, offset_size) {
1063        return Ok(Vec::new());
1064    }
1065
1066    let is_filtered = header.client_id == 1;
1067    let sb_layout = compute_super_block_layout(&header);
1068    let sizeof_nelmts = (header._max_nelmts_bits as usize).div_ceil(8);
1069
1070    if let Some(bounds) = chunk_bounds {
1071        let targets = linear_target_offsets(dataset_shape, chunk_dims, Some(bounds));
1072        let _index_block_len = 4
1073            + 1
1074            + 1
1075            + usize::from(offset_size)
1076            + usize::from(header.idx_blk_elmts) * usize::from(header.element_size)
1077            + (2 * usize::from(header.sec_blk_min_data_ptrs)) * usize::from(offset_size)
1078            + sb_layout
1079                .len()
1080                .saturating_sub(2 * usize::from(header.sec_blk_min_data_ptrs))
1081                * usize::from(offset_size)
1082            + 4;
1083        let index_block = storage.read_range(
1084            header.index_block_address,
1085            usize::try_from(storage.len().saturating_sub(header.index_block_address)).map_err(
1086                |_| {
1087                    Error::InvalidData(
1088                        "extensible array index block exceeds platform usize capacity".into(),
1089                    )
1090                },
1091            )?,
1092        )?;
1093        let mut cursor = Cursor::new(index_block.as_ref());
1094        let sig = cursor.read_bytes(4)?;
1095        if sig != EAIB_SIGNATURE {
1096            return Err(Error::InvalidExtensibleArraySignature {
1097                context: "index block signature mismatch",
1098            });
1099        }
1100        let version = cursor.read_u8()?;
1101        if version != 0 {
1102            return Err(Error::Other(format!(
1103                "unsupported extensible array index block version {}",
1104                version
1105            )));
1106        }
1107        let _client_id = cursor.read_u8()?;
1108        let _header_address = cursor.read_offset(offset_size)?;
1109        let num_inline = header.idx_blk_elmts as usize;
1110        let inline_start = cursor.position();
1111        cursor.skip(num_inline * header.element_size as usize)?;
1112
1113        let ndblk_addrs = 2 * header.sec_blk_min_data_ptrs as usize;
1114        let mut direct_dblk_addrs = Vec::with_capacity(ndblk_addrs);
1115        for _ in 0..ndblk_addrs {
1116            direct_dblk_addrs.push(cursor.read_offset(offset_size)?);
1117        }
1118
1119        let nsblks = sb_layout.len();
1120        let nsblk_addrs = nsblks.saturating_sub(ndblk_addrs);
1121        let mut sec_block_addrs = Vec::with_capacity(nsblk_addrs);
1122        for _ in 0..nsblk_addrs {
1123            sec_block_addrs.push(cursor.read_offset(offset_size)?);
1124        }
1125
1126        let mut secondary_block_cache: Vec<Option<Vec<u64>>> = vec![None; sec_block_addrs.len()];
1127        let mut entries = Vec::new();
1128
1129        for (linear_idx, offsets) in targets {
1130            let raw = if linear_idx < num_inline {
1131                let inline_offset = inline_start
1132                    + u64::try_from(linear_idx * usize::from(header.element_size)).map_err(
1133                        |_| {
1134                            Error::InvalidData("EA inline entry offset exceeds u64 capacity".into())
1135                        },
1136                    )?;
1137                let position = header.index_block_address + inline_offset;
1138                read_entry_at_storage(
1139                    storage,
1140                    position,
1141                    is_filtered,
1142                    offset_size,
1143                    header.element_size,
1144                )?
1145            } else {
1146                let mut relative_idx = (linear_idx - num_inline) as u64;
1147                let mut sb_idx = None;
1148                for (candidate_idx, (elmts_per_dblk, num_dblks)) in sb_layout.iter().enumerate() {
1149                    let capacity = elmts_per_dblk * num_dblks;
1150                    if relative_idx < capacity {
1151                        sb_idx = Some(candidate_idx);
1152                        break;
1153                    }
1154                    relative_idx -= capacity;
1155                }
1156
1157                let Some(sb_idx) = sb_idx else {
1158                    continue;
1159                };
1160                let (elmts_per_dblk, _) = sb_layout[sb_idx];
1161                let dblk_idx = (relative_idx / elmts_per_dblk) as usize;
1162                let local_idx = (relative_idx % elmts_per_dblk) as usize;
1163
1164                let dblk_addr = if sb_idx < 2 {
1165                    let base = sb_layout[..sb_idx]
1166                        .iter()
1167                        .map(|(_, num_dblks)| *num_dblks as usize)
1168                        .sum::<usize>();
1169                    *direct_dblk_addrs.get(base + dblk_idx).unwrap_or(&u64::MAX)
1170                } else {
1171                    let sec_cache_idx = sb_idx - 2;
1172                    if secondary_block_cache[sec_cache_idx].is_none() {
1173                        let sec_addr = sec_block_addrs
1174                            .get(sec_cache_idx)
1175                            .copied()
1176                            .unwrap_or(u64::MAX);
1177                        if Cursor::is_undefined_offset(sec_addr, offset_size) {
1178                            secondary_block_cache[sec_cache_idx] = Some(Vec::new());
1179                        } else {
1180                            let (_, num_dblks) = sb_layout[sb_idx];
1181                            let page_bitmap_bytes = if header.max_dblk_page_nelmts_bits > 0
1182                                && elmts_per_dblk > (1u64 << header.max_dblk_page_nelmts_bits)
1183                            {
1184                                let page_nelmts = 1usize << header.max_dblk_page_nelmts_bits;
1185                                let pages_per_dblk =
1186                                    (elmts_per_dblk as usize).div_ceil(page_nelmts);
1187                                (num_dblks as usize * pages_per_dblk).div_ceil(8)
1188                            } else {
1189                                0
1190                            };
1191                            secondary_block_cache[sec_cache_idx] =
1192                                Some(parse_secondary_block_storage(
1193                                    storage,
1194                                    sec_addr,
1195                                    num_dblks as usize,
1196                                    offset_size,
1197                                    sizeof_nelmts,
1198                                    page_bitmap_bytes,
1199                                )?);
1200                        }
1201                    }
1202
1203                    secondary_block_cache[sec_cache_idx]
1204                        .as_ref()
1205                        .and_then(|addrs| addrs.get(dblk_idx))
1206                        .copied()
1207                        .unwrap_or(u64::MAX)
1208                };
1209
1210                if Cursor::is_undefined_offset(dblk_addr, offset_size) {
1211                    continue;
1212                }
1213
1214                read_data_block_entry_storage(
1215                    storage,
1216                    dblk_addr,
1217                    elmts_per_dblk as usize,
1218                    local_idx,
1219                    is_filtered,
1220                    header.max_dblk_page_nelmts_bits,
1221                    offset_size,
1222                    header.element_size,
1223                    sizeof_nelmts,
1224                )?
1225            };
1226
1227            if Cursor::is_undefined_offset(raw.address, offset_size) {
1228                continue;
1229            }
1230
1231            entries.push(ChunkEntry {
1232                address: raw.address,
1233                size: raw.chunk_size,
1234                filter_mask: raw.filter_mask,
1235                offsets,
1236            });
1237        }
1238
1239        return Ok(entries);
1240    }
1241
1242    let index_block_len = 4
1243        + 1
1244        + 1
1245        + usize::from(offset_size)
1246        + usize::from(header.idx_blk_elmts) * usize::from(header.element_size)
1247        + (2 * usize::from(header.sec_blk_min_data_ptrs)) * usize::from(offset_size)
1248        + sb_layout
1249            .len()
1250            .saturating_sub(2 * usize::from(header.sec_blk_min_data_ptrs))
1251            * usize::from(offset_size)
1252        + 4;
1253    let data = storage.read_range(header.index_block_address, index_block_len)?;
1254    let mut cursor = Cursor::new(data.as_ref());
1255    cursor.set_position(0);
1256
1257    let sig = cursor.read_bytes(4)?;
1258    if sig != EAIB_SIGNATURE {
1259        return Err(Error::InvalidExtensibleArraySignature {
1260            context: "index block signature mismatch",
1261        });
1262    }
1263
1264    let version = cursor.read_u8()?;
1265    if version != 0 {
1266        return Err(Error::Other(format!(
1267            "unsupported extensible array index block version {}",
1268            version
1269        )));
1270    }
1271
1272    let _client_id = cursor.read_u8()?;
1273    let _header_address = cursor.read_offset(offset_size)?;
1274
1275    let num_inline = header.idx_blk_elmts as usize;
1276    let inline_entries = read_entries(
1277        &mut cursor,
1278        num_inline,
1279        is_filtered,
1280        offset_size,
1281        header.element_size,
1282    )?;
1283
1284    let ndblk_addrs = 2 * header.sec_blk_min_data_ptrs as usize;
1285    let mut direct_dblk_addrs = Vec::with_capacity(ndblk_addrs);
1286    for _ in 0..ndblk_addrs {
1287        direct_dblk_addrs.push(cursor.read_offset(offset_size)?);
1288    }
1289
1290    let nsblks = sb_layout.len();
1291    let nsblk_addrs = nsblks.saturating_sub(ndblk_addrs);
1292    let mut sec_block_addrs = Vec::with_capacity(nsblk_addrs);
1293    for _ in 0..nsblk_addrs {
1294        sec_block_addrs.push(cursor.read_offset(offset_size)?);
1295    }
1296    let _checksum = cursor.read_u32_le()?;
1297
1298    let mut all_entries: Vec<EaRawEntry> = Vec::new();
1299    all_entries.extend(inline_entries);
1300
1301    let mut dblk_addr_idx = 0;
1302    for sb_idx_iter in 0..2usize.min(nsblks) {
1303        if sb_idx_iter >= sb_layout.len() {
1304            break;
1305        }
1306        let (elmts_per_dblk, num_dblks) = sb_layout[sb_idx_iter];
1307        for _ in 0..num_dblks {
1308            if dblk_addr_idx >= direct_dblk_addrs.len() {
1309                break;
1310            }
1311            let dblk_addr = direct_dblk_addrs[dblk_addr_idx];
1312            dblk_addr_idx += 1;
1313
1314            if Cursor::is_undefined_offset(dblk_addr, offset_size) {
1315                for _ in 0..elmts_per_dblk {
1316                    all_entries.push(EaRawEntry {
1317                        address: u64::MAX,
1318                        chunk_size: 0,
1319                        filter_mask: 0,
1320                    });
1321                }
1322            } else {
1323                let dblk_entries = {
1324                    let page_nelmts = if header.max_dblk_page_nelmts_bits > 0 {
1325                        1usize << header.max_dblk_page_nelmts_bits
1326                    } else {
1327                        0
1328                    };
1329                    let _dblk_len = if page_nelmts > 0 && elmts_per_dblk as usize > page_nelmts {
1330                        let num_pages = (elmts_per_dblk as usize).div_ceil(page_nelmts);
1331                        let bitmap_bytes = num_pages.div_ceil(8);
1332                        let mut len =
1333                            4 + 1 + 1 + usize::from(offset_size) + sizeof_nelmts + bitmap_bytes;
1334                        for page_idx in 0..num_pages {
1335                            let entries_in_page = if page_idx == num_pages - 1 {
1336                                let remainder = elmts_per_dblk as usize % page_nelmts;
1337                                if remainder == 0 {
1338                                    page_nelmts
1339                                } else {
1340                                    remainder
1341                                }
1342                            } else {
1343                                page_nelmts
1344                            };
1345                            len += entries_in_page * usize::from(header.element_size) + 4;
1346                        }
1347                        len
1348                    } else {
1349                        4 + 1
1350                            + 1
1351                            + usize::from(offset_size)
1352                            + sizeof_nelmts
1353                            + elmts_per_dblk as usize * usize::from(header.element_size)
1354                            + 4
1355                    };
1356                    let block = storage.read_range(
1357                        dblk_addr,
1358                        usize::try_from(storage.len().saturating_sub(dblk_addr)).map_err(|_| {
1359                            Error::InvalidData(
1360                                "extensible array data block exceeds platform usize capacity"
1361                                    .into(),
1362                            )
1363                        })?,
1364                    )?;
1365                    parse_data_block(
1366                        block.as_ref(),
1367                        0,
1368                        elmts_per_dblk as usize,
1369                        is_filtered,
1370                        header.max_dblk_page_nelmts_bits,
1371                        offset_size,
1372                        header.element_size,
1373                        sizeof_nelmts,
1374                    )?
1375                };
1376                all_entries.extend(dblk_entries);
1377            }
1378        }
1379    }
1380
1381    for (sb_idx_iter, &(elmts_per_dblk, num_dblks)) in sb_layout.iter().enumerate().skip(2) {
1382        let sec_idx = sb_idx_iter - 2;
1383        let sec_addr = *sec_block_addrs.get(sec_idx).unwrap_or(&u64::MAX);
1384        if Cursor::is_undefined_offset(sec_addr, offset_size) {
1385            for _ in 0..(elmts_per_dblk * num_dblks) {
1386                all_entries.push(EaRawEntry {
1387                    address: u64::MAX,
1388                    chunk_size: 0,
1389                    filter_mask: 0,
1390                });
1391            }
1392            continue;
1393        }
1394
1395        let page_bitmap_bytes = if header.max_dblk_page_nelmts_bits > 0
1396            && elmts_per_dblk > (1u64 << header.max_dblk_page_nelmts_bits)
1397        {
1398            let page_nelmts = 1usize << header.max_dblk_page_nelmts_bits;
1399            let pages_per_dblk = (elmts_per_dblk as usize).div_ceil(page_nelmts);
1400            (num_dblks as usize * pages_per_dblk).div_ceil(8)
1401        } else {
1402            0
1403        };
1404        let dblk_addrs = parse_secondary_block_storage(
1405            storage,
1406            sec_addr,
1407            num_dblks as usize,
1408            offset_size,
1409            sizeof_nelmts,
1410            page_bitmap_bytes,
1411        )?;
1412
1413        for dblk_addr in dblk_addrs {
1414            if Cursor::is_undefined_offset(dblk_addr, offset_size) {
1415                for _ in 0..elmts_per_dblk {
1416                    all_entries.push(EaRawEntry {
1417                        address: u64::MAX,
1418                        chunk_size: 0,
1419                        filter_mask: 0,
1420                    });
1421                }
1422            } else {
1423                let page_nelmts = if header.max_dblk_page_nelmts_bits > 0 {
1424                    1usize << header.max_dblk_page_nelmts_bits
1425                } else {
1426                    0
1427                };
1428                let _dblk_len = if page_nelmts > 0 && elmts_per_dblk as usize > page_nelmts {
1429                    let num_pages = (elmts_per_dblk as usize).div_ceil(page_nelmts);
1430                    let bitmap_bytes = num_pages.div_ceil(8);
1431                    let mut len =
1432                        4 + 1 + 1 + usize::from(offset_size) + sizeof_nelmts + bitmap_bytes;
1433                    for page_idx in 0..num_pages {
1434                        let entries_in_page = if page_idx == num_pages - 1 {
1435                            let remainder = elmts_per_dblk as usize % page_nelmts;
1436                            if remainder == 0 {
1437                                page_nelmts
1438                            } else {
1439                                remainder
1440                            }
1441                        } else {
1442                            page_nelmts
1443                        };
1444                        len += entries_in_page * usize::from(header.element_size) + 4;
1445                    }
1446                    len
1447                } else {
1448                    4 + 1
1449                        + 1
1450                        + usize::from(offset_size)
1451                        + sizeof_nelmts
1452                        + elmts_per_dblk as usize * usize::from(header.element_size)
1453                        + 4
1454                };
1455                let block = storage.read_range(
1456                    dblk_addr,
1457                    usize::try_from(storage.len().saturating_sub(dblk_addr)).map_err(|_| {
1458                        Error::InvalidData(
1459                            "extensible array data block exceeds platform usize capacity".into(),
1460                        )
1461                    })?,
1462                )?;
1463                let dblk_entries = parse_data_block(
1464                    block.as_ref(),
1465                    0,
1466                    elmts_per_dblk as usize,
1467                    is_filtered,
1468                    header.max_dblk_page_nelmts_bits,
1469                    offset_size,
1470                    header.element_size,
1471                    sizeof_nelmts,
1472                )?;
1473                all_entries.extend(dblk_entries);
1474            }
1475        }
1476    }
1477
1478    let ndim = dataset_shape.len();
1479    let chunks_per_dim: Vec<u64> = (0..ndim)
1480        .map(|i| dataset_shape[i].div_ceil(chunk_dims[i] as u64))
1481        .collect();
1482
1483    let mut entries = Vec::new();
1484    for (linear_idx, raw) in all_entries.iter().enumerate() {
1485        if Cursor::is_undefined_offset(raw.address, offset_size) {
1486            continue;
1487        }
1488
1489        let mut remaining = linear_idx as u64;
1490        let mut offsets = vec![0u64; ndim];
1491        for d in (0..ndim).rev() {
1492            offsets[d] = (remaining % chunks_per_dim[d]) * chunk_dims[d] as u64;
1493            remaining /= chunks_per_dim[d];
1494        }
1495
1496        if let Some((first_chunk, last_chunk)) = chunk_bounds {
1497            let overlaps = offsets.iter().enumerate().all(|(dim, offset)| {
1498                let chunk_index = *offset / u64::from(chunk_dims[dim]);
1499                chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
1500            });
1501            if !overlaps {
1502                continue;
1503            }
1504        }
1505
1506        entries.push(ChunkEntry {
1507            address: raw.address,
1508            size: raw.chunk_size,
1509            filter_mask: raw.filter_mask,
1510            offsets,
1511        });
1512    }
1513
1514    Ok(entries)
1515}
1516
1517#[cfg(test)]
1518mod tests {
1519    use super::*;
1520
1521    #[test]
1522    fn test_eahd_bad_signature() {
1523        let mut data = vec![0u8; 64];
1524        data[0..4].copy_from_slice(b"XXXX");
1525        let err = parse_header(&data, 0, 8, 8).unwrap_err();
1526        assert!(matches!(err, Error::InvalidExtensibleArraySignature { .. }));
1527    }
1528
1529    #[test]
1530    fn test_compute_super_block_layout() {
1531        let header = EaHeader {
1532            client_id: 0,
1533            element_size: 8,
1534            _max_nelmts_bits: 32,
1535            idx_blk_elmts: 2,
1536            data_blk_min_elmts: 2,
1537            sec_blk_min_data_ptrs: 2,
1538            max_dblk_page_nelmts_bits: 0,
1539            _nelmts: 100,
1540            index_block_address: 0,
1541        };
1542        let layout = compute_super_block_layout(&header);
1543        // sb 0: elmts_per_dblk = 2 * 2^0 = 2, num_dblks = 2 * 2^0 = 2  (cap = 4 elements)
1544        assert_eq!(layout[0], (2, 2));
1545        // sb 1: elmts_per_dblk = 2 * 2^0 = 2, num_dblks = 2 * 2^1 = 4  (cap = 8 elements)
1546        assert_eq!(layout[1], (2, 4));
1547        // sb 2: elmts_per_dblk = 2 * 2^1 = 4, num_dblks = 2 * 2^1 = 4  (cap = 16 elements)
1548        assert_eq!(layout[2], (4, 4));
1549        // sb 3: elmts_per_dblk = 2 * 2^1 = 4, num_dblks = 2 * 2^2 = 8  (cap = 32 elements)
1550        assert_eq!(layout[3], (4, 8));
1551    }
1552}