Skip to main content

rustyhdf5_format/
extensible_array.rs

1//! HDF5 Extensible Array index parsing for chunked datasets (v4 index type 4).
2//!
3//! Extensible Arrays are used for datasets with exactly one unlimited dimension.
4//! Structures: AEHD (header), AEIB (index block), AEDB (data block), AESB (super block).
5
6#[cfg(not(feature = "std"))]
7extern crate alloc;
8
9#[cfg(not(feature = "std"))]
10use alloc::{format, vec, vec::Vec};
11
12use crate::chunked_read::ChunkInfo;
13use crate::error::FormatError;
14
15/// Parsed Extensible Array header (AEHD).
16#[derive(Debug, Clone)]
17pub struct ExtensibleArrayHeader {
18    /// Client ID: 0 = non-filtered chunks, 1 = filtered chunks.
19    pub client_id: u8,
20    /// Size of each array element in bytes.
21    pub element_size: u8,
22    /// Max number of elements bits (log2 of the max number of data block elements per page).
23    pub max_nelmts_bits: u8,
24    /// Number of elements in the index block.
25    pub idx_blk_elmts: u8,
26    /// Minimum number of data block elements.
27    pub min_dblk_nelmts: u8,
28    /// Minimum number of elements in a super block.
29    pub super_blk_min_nelmts: u8,
30    /// Max number of data block elements bits.
31    pub max_dblk_nelmts_bits: u8,
32    /// Total number of elements stored.
33    pub num_elements: u64,
34    /// Address of the index block.
35    pub index_block_address: u64,
36}
37
38fn read_offset(data: &[u8], pos: usize, size: u8) -> Result<u64, FormatError> {
39    let s = size as usize;
40    if pos + s > data.len() {
41        return Err(FormatError::UnexpectedEof {
42            expected: pos + s,
43            available: data.len(),
44        });
45    }
46    let slice = &data[pos..pos + s];
47    Ok(match size {
48        2 => u16::from_le_bytes([slice[0], slice[1]]) as u64,
49        4 => u32::from_le_bytes([slice[0], slice[1], slice[2], slice[3]]) as u64,
50        8 => u64::from_le_bytes([
51            slice[0], slice[1], slice[2], slice[3], slice[4], slice[5], slice[6], slice[7],
52        ]),
53        _ => return Err(FormatError::InvalidOffsetSize(size)),
54    })
55}
56
57fn is_undefined_addr(addr: u64, offset_size: u8) -> bool {
58    match offset_size {
59        2 => addr == 0xFFFF,
60        4 => addr == 0xFFFF_FFFF,
61        8 => addr == 0xFFFF_FFFF_FFFF_FFFF,
62        _ => false,
63    }
64}
65
66fn is_undefined(data: &[u8], pos: usize, size: u8) -> bool {
67    let s = size as usize;
68    if pos + s > data.len() {
69        return false;
70    }
71    data[pos..pos + s].iter().all(|&b| b == 0xFF)
72}
73
74fn read_variable_length(data: &[u8], size: usize) -> Result<u64, FormatError> {
75    if size > 8 || data.len() < size {
76        return Err(FormatError::ChunkedReadError(
77            "invalid variable-length size".into(),
78        ));
79    }
80    let mut val = 0u64;
81    for (i, &byte) in data.iter().enumerate().take(size) {
82        val |= (byte as u64) << (i * 8);
83    }
84    Ok(val)
85}
86
87impl ExtensibleArrayHeader {
88    /// Parse an Extensible Array header from file data at the given offset.
89    pub fn parse(
90        file_data: &[u8],
91        offset: usize,
92        offset_size: u8,
93        length_size: u8,
94    ) -> Result<Self, FormatError> {
95        // EAHD: signature(4) + version(1) + client_id(1) + element_size(1) +
96        //   max_nelmts_bits(1) + idx_blk_elmts(1) + min_dblk_nelmts(1) +
97        //   super_blk_min_nelmts(1) + max_dblk_nelmts_bits(1) +
98        //   6 stats fields (each length_size) + index_block_address(offset_size) + checksum(4)
99        let min_size = 4 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1
100            + 6 * length_size as usize + offset_size as usize + 4;
101        if offset + min_size > file_data.len() {
102            return Err(FormatError::UnexpectedEof {
103                expected: offset + min_size,
104                available: file_data.len(),
105            });
106        }
107
108        let d = &file_data[offset..];
109        if &d[0..4] != b"EAHD" {
110            return Err(FormatError::ChunkedReadError(
111                "invalid Extensible Array header signature".into(),
112            ));
113        }
114
115        let version = d[4];
116        if version != 0 {
117            return Err(FormatError::ChunkedReadError(
118                format!("unsupported Extensible Array header version: {version}"),
119            ));
120        }
121
122        let client_id = d[5];
123        let element_size = d[6];
124        let max_nelmts_bits = d[7];
125        let idx_blk_elmts = d[8];
126        let min_dblk_nelmts = d[9];
127        let super_blk_min_nelmts = d[10];
128        let max_dblk_nelmts_bits = d[11];
129
130        let mut pos = 12;
131        // 6 stats fields: [0] unknown, [1] unknown, [2] nsuper_blks_created,
132        // [3] super_blk_size, [4] nelmts, [5] max_idx_set
133        // We only need nelmts (field[4]) and skip the rest.
134        let ls = length_size as usize;
135        pos += 4 * ls; // skip first 4 stats fields
136        let num_elements = read_offset(d, pos, length_size)?;
137        pos += ls; // skip nelmts
138        pos += ls; // skip max_idx_set (6th stats field)
139        let index_block_address = read_offset(d, pos, offset_size)?;
140
141        Ok(ExtensibleArrayHeader {
142            client_id,
143            element_size,
144            max_nelmts_bits,
145            idx_blk_elmts,
146            min_dblk_nelmts,
147            super_blk_min_nelmts,
148            max_dblk_nelmts_bits,
149            num_elements,
150            index_block_address,
151        })
152    }
153
154    /// Compute the size of this header in bytes (for write support).
155    pub fn serialized_size(offset_size: u8, length_size: u8) -> usize {
156        4 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1
157            + 6 * length_size as usize + offset_size as usize + 4
158    }
159}
160
161/// Read a single element from the extensible array element data.
162/// Returns (chunk_info, bytes_consumed) or None if unallocated.
163#[allow(clippy::too_many_arguments)]
164fn read_element(
165    data: &[u8],
166    pos: usize,
167    client_id: u8,
168    element_size: u8,
169    offset_size: u8,
170    chunk_byte_size: u64,
171    linear_index: usize,
172    num_chunks_per_dim: &[u64],
173    chunk_dimensions: &[u32],
174) -> Result<(Option<ChunkInfo>, usize), FormatError> {
175    let os = offset_size as usize;
176
177    if client_id == 0 {
178        // Non-filtered: just address
179        if pos + os > data.len() {
180            return Err(FormatError::UnexpectedEof {
181                expected: pos + os,
182                available: data.len(),
183            });
184        }
185        if is_undefined(data, pos, offset_size) {
186            return Ok((None, os));
187        }
188        let address = read_offset(data, pos, offset_size)?;
189        let offsets = index_to_chunk_offsets(linear_index, num_chunks_per_dim, chunk_dimensions);
190        Ok((
191            Some(ChunkInfo {
192                chunk_size: chunk_byte_size as u32,
193                filter_mask: 0,
194                offsets,
195                address,
196            }),
197            os,
198        ))
199    } else {
200        // Filtered: address + compressed_size + filter_mask
201        let chunk_size_bytes = element_size as usize - os - 4;
202        let elem_total = os + chunk_size_bytes + 4;
203        if pos + elem_total > data.len() {
204            return Err(FormatError::UnexpectedEof {
205                expected: pos + elem_total,
206                available: data.len(),
207            });
208        }
209        if is_undefined(data, pos, offset_size) {
210            return Ok((None, elem_total));
211        }
212        let address = read_offset(data, pos, offset_size)?;
213        let chunk_size = read_variable_length(&data[pos + os..], chunk_size_bytes)?;
214        let fm_off = pos + os + chunk_size_bytes;
215        let filter_mask = u32::from_le_bytes([
216            data[fm_off],
217            data[fm_off + 1],
218            data[fm_off + 2],
219            data[fm_off + 3],
220        ]);
221        let offsets = index_to_chunk_offsets(linear_index, num_chunks_per_dim, chunk_dimensions);
222        Ok((
223            Some(ChunkInfo {
224                chunk_size: chunk_size as u32,
225                filter_mask,
226                offsets,
227                address,
228            }),
229            elem_total,
230        ))
231    }
232}
233
234/// Convert a linear chunk index to N-dimensional chunk offsets in dataset space.
235fn index_to_chunk_offsets(
236    index: usize,
237    num_chunks_per_dim: &[u64],
238    chunk_dimensions: &[u32],
239) -> Vec<u64> {
240    let rank = num_chunks_per_dim.len();
241    let mut offsets = vec![0u64; rank];
242    let mut remaining = index as u64;
243    for d in (0..rank).rev() {
244        let nchunks = num_chunks_per_dim[d];
245        let chunk_idx = remaining % nchunks;
246        remaining /= nchunks;
247        offsets[d] = chunk_idx * chunk_dimensions[d] as u64;
248    }
249    offsets
250}
251
252/// Collect elements from a data block at the given offset.
253#[allow(clippy::too_many_arguments)]
254fn read_data_block_elements(
255    file_data: &[u8],
256    db_offset: usize,
257    nelmts: usize,
258    header: &ExtensibleArrayHeader,
259    offset_size: u8,
260    chunk_byte_size: u64,
261    start_index: usize,
262    num_chunks_per_dim: &[u64],
263    chunk_dimensions: &[u32],
264) -> Result<Vec<ChunkInfo>, FormatError> {
265    // AEDB: signature(4) + version(1) + client_id(1) + header_address(offset_size)
266    let db_header_size = 4 + 1 + 1 + offset_size as usize;
267    if db_offset + db_header_size > file_data.len() {
268        return Err(FormatError::UnexpectedEof {
269            expected: db_offset + db_header_size,
270            available: file_data.len(),
271        });
272    }
273
274    let d = &file_data[db_offset..];
275    if &d[0..4] != b"EADB" {
276        return Err(FormatError::ChunkedReadError(
277            "invalid Extensible Array data block signature".into(),
278        ));
279    }
280    // Skip version(1) + client_id(1) + header_address(offset_size) + block_offset
281    // Block offset is encoded in ceil(max_nelmts_bits/8) bytes
282    let blk_off_size = (header.max_nelmts_bits as usize).div_ceil(8);
283    let mut pos = db_offset + db_header_size + blk_off_size;
284
285    // Check if paged
286    let page_nelmts = 1usize << header.max_nelmts_bits;
287    let is_paged = nelmts > page_nelmts;
288
289    let mut chunks = Vec::new();
290
291    if !is_paged {
292        for i in 0..nelmts {
293            let (info, consumed) = read_element(
294                file_data,
295                pos,
296                header.client_id,
297                header.element_size,
298                offset_size,
299                chunk_byte_size,
300                start_index + i,
301                num_chunks_per_dim,
302                chunk_dimensions,
303            )?;
304            if let Some(ci) = info {
305                chunks.push(ci);
306            }
307            pos += consumed;
308        }
309    } else {
310        // Paged: elements are split into pages of page_nelmts.
311        // After the data block header comes a page bitmap, then each page
312        // has page_nelmts elements followed by a 4-byte checksum.
313        let npages = nelmts.div_ceil(page_nelmts);
314        // Page bitmap: ceil(npages / 8) bytes
315        let bitmap_size = npages.div_ceil(8);
316        // Read bitmap
317        if pos + bitmap_size > file_data.len() {
318            return Err(FormatError::UnexpectedEof {
319                expected: pos + bitmap_size,
320                available: file_data.len(),
321            });
322        }
323        let bitmap = &file_data[pos..pos + bitmap_size];
324        pos += bitmap_size;
325
326        let elem_bytes = if header.client_id == 0 {
327            offset_size as usize
328        } else {
329            header.element_size as usize
330        };
331
332        let mut global_idx = start_index;
333        for page_idx in 0..npages {
334            let byte_idx = page_idx / 8;
335            let bit_idx = page_idx % 8;
336            let page_has_data = (bitmap[byte_idx] >> bit_idx) & 1 != 0;
337
338            let elems_this_page = if page_idx == npages - 1 {
339                let remainder = nelmts % page_nelmts;
340                if remainder == 0 { page_nelmts } else { remainder }
341            } else {
342                page_nelmts
343            };
344
345            if page_has_data {
346                for i in 0..elems_this_page {
347                    let (info, consumed) = read_element(
348                        file_data,
349                        pos,
350                        header.client_id,
351                        header.element_size,
352                        offset_size,
353                        chunk_byte_size,
354                        global_idx + i,
355                        num_chunks_per_dim,
356                        chunk_dimensions,
357                    )?;
358                    if let Some(ci) = info {
359                        chunks.push(ci);
360                    }
361                    pos += consumed;
362                }
363                // Skip page checksum (4 bytes)
364                pos += 4;
365            } else {
366                // Empty page: skip all elements + checksum
367                pos += elems_this_page * elem_bytes + 4;
368            }
369            global_idx += elems_this_page;
370        }
371    }
372
373    Ok(chunks)
374}
375
376/// Read chunk records from an Extensible Array.
377///
378/// Traverses AEHD -> AEIB -> AEDB/AESB to collect all allocated chunks.
379#[allow(clippy::too_many_arguments)]
380pub fn read_extensible_array_chunks(
381    file_data: &[u8],
382    header: &ExtensibleArrayHeader,
383    dataset_dims: &[u64],
384    chunk_dimensions: &[u32],
385    element_size: u32,
386    offset_size: u8,
387    _length_size: u8,
388) -> Result<Vec<ChunkInfo>, FormatError> {
389    let rank = chunk_dimensions.len();
390    let os = offset_size as usize;
391
392    let mut num_chunks_per_dim = Vec::with_capacity(rank);
393    for d in 0..rank {
394        let ds_dim = dataset_dims[d];
395        let ch_dim = chunk_dimensions[d] as u64;
396        num_chunks_per_dim.push(ds_dim.div_ceil(ch_dim));
397    }
398
399    let chunk_byte_size: u64 = chunk_dimensions.iter().map(|&d| d as u64).product::<u64>()
400        * element_size as u64;
401
402    // Parse index block (AEIB)
403    let ib_offset = header.index_block_address as usize;
404    let ib_header_size = 4 + 1 + 1 + offset_size as usize; // sig + ver + client + hdr_addr
405    if ib_offset + ib_header_size > file_data.len() {
406        return Err(FormatError::UnexpectedEof {
407            expected: ib_offset + ib_header_size,
408            available: file_data.len(),
409        });
410    }
411
412    let ib = &file_data[ib_offset..];
413    if &ib[0..4] != b"EAIB" {
414        return Err(FormatError::ChunkedReadError(
415            "invalid Extensible Array index block signature".into(),
416        ));
417    }
418    // Skip version(1) + client_id(1) + header_address(offset_size)
419    let mut pos = ib_offset + ib_header_size;
420
421    let mut chunks = Vec::new();
422    let mut global_index = 0usize;
423    let total_elements = header.num_elements as usize;
424
425    // 1. Read inline elements in index block
426    let n_inline = header.idx_blk_elmts as usize;
427    for i in 0..n_inline {
428        if global_index + i >= total_elements {
429            break;
430        }
431        let (info, consumed) = read_element(
432            file_data,
433            pos,
434            header.client_id,
435            header.element_size,
436            offset_size,
437            chunk_byte_size,
438            global_index + i,
439            &num_chunks_per_dim,
440            chunk_dimensions,
441        )?;
442        if let Some(ci) = info {
443            chunks.push(ci);
444        }
445        pos += consumed;
446    }
447    global_index += n_inline.min(total_elements);
448
449    // If all elements were inline, we're done
450    if global_index >= total_elements {
451        return Ok(chunks);
452    }
453
454    // Compute data block and super block counts
455    let min_dblk = header.min_dblk_nelmts as usize;
456    let sblk_min = header.super_blk_min_nelmts as usize;
457
458    // The first sblk_min super block levels have their data blocks listed directly
459    // in the index block. Compute their sizes.
460    let mut n_direct_dblks = 0usize;
461    let mut dblk_sizes: Vec<usize> = Vec::new();
462    {
463        let mut nelmts = min_dblk;
464        for sb_level in 0..sblk_min {
465            let ndblks = 1usize << sb_level;
466            for _ in 0..ndblks {
467                dblk_sizes.push(nelmts);
468                n_direct_dblks += 1;
469            }
470            if sb_level > 0 {
471                nelmts *= 2;
472            }
473        }
474    }
475
476    // Read direct data block addresses from index block
477    let mut dblk_addrs: Vec<u64> = Vec::with_capacity(n_direct_dblks);
478    for _ in 0..n_direct_dblks {
479        if pos + os > file_data.len() {
480            break;
481        }
482        let addr = read_offset(file_data, pos, offset_size)?;
483        dblk_addrs.push(addr);
484        pos += os;
485    }
486
487    // Read elements from direct data blocks
488    for (i, &addr) in dblk_addrs.iter().enumerate() {
489        if i >= dblk_sizes.len() {
490            break;
491        }
492        let nelmts = dblk_sizes[i];
493        if is_undefined_addr(addr, offset_size) {
494            global_index += nelmts;
495            continue;
496        }
497        let block_chunks = read_data_block_elements(
498            file_data,
499            addr as usize,
500            nelmts,
501            header,
502            offset_size,
503            chunk_byte_size,
504            global_index,
505            &num_chunks_per_dim,
506            chunk_dimensions,
507        )?;
508        chunks.extend(block_chunks);
509        global_index += nelmts;
510    }
511
512    // Remaining elements are in super blocks
513    let total_in_ib_and_direct: usize = n_inline + dblk_sizes.iter().sum::<usize>();
514    if total_elements <= total_in_ib_and_direct {
515        return Ok(chunks);
516    }
517    let remaining_elements = total_elements - total_in_ib_and_direct;
518
519    // Compute super block layout
520    let mut sb_addrs: Vec<u64> = Vec::new();
521    let mut sb_infos: Vec<(usize, usize)> = Vec::new();
522    {
523        let mut covered = 0usize;
524        let mut sb_level = sblk_min;
525        let mut nelmts_per_dblk = min_dblk;
526        for lev in 0..sblk_min {
527            if lev > 0 {
528                nelmts_per_dblk *= 2;
529            }
530        }
531
532        while covered < remaining_elements {
533            let ndblks = 1usize << sb_level;
534            nelmts_per_dblk *= 2;
535            let total_in_sb = ndblks * nelmts_per_dblk;
536            sb_infos.push((ndblks, nelmts_per_dblk));
537            covered += total_in_sb;
538            sb_level += 1;
539        }
540    }
541
542    // Read super block addresses from index block
543    for _ in 0..sb_infos.len() {
544        if pos + os > file_data.len() {
545            break;
546        }
547        let addr = read_offset(file_data, pos, offset_size)?;
548        sb_addrs.push(addr);
549        pos += os;
550    }
551
552    // Process each super block
553    for (sb_idx, &sb_addr) in sb_addrs.iter().enumerate() {
554        let (ndblks, nelmts_per_dblk) = sb_infos[sb_idx];
555        if is_undefined_addr(sb_addr, offset_size) {
556            global_index += ndblks * nelmts_per_dblk;
557            continue;
558        }
559        let sb_chunks = read_super_block(
560            file_data,
561            sb_addr as usize,
562            ndblks,
563            nelmts_per_dblk,
564            header,
565            offset_size,
566            chunk_byte_size,
567            global_index,
568            &num_chunks_per_dim,
569            chunk_dimensions,
570        )?;
571        chunks.extend(sb_chunks);
572        global_index += ndblks * nelmts_per_dblk;
573    }
574
575    Ok(chunks)
576}
577
578/// Read a super block (AESB) and its data blocks.
579#[allow(clippy::too_many_arguments)]
580fn read_super_block(
581    file_data: &[u8],
582    sb_offset: usize,
583    ndblks: usize,
584    nelmts_per_dblk: usize,
585    header: &ExtensibleArrayHeader,
586    offset_size: u8,
587    chunk_byte_size: u64,
588    start_index: usize,
589    num_chunks_per_dim: &[u64],
590    chunk_dimensions: &[u32],
591) -> Result<Vec<ChunkInfo>, FormatError> {
592    let os = offset_size as usize;
593
594    // AESB: signature(4) + version(1) + client_id(1) + header_address(offset_size)
595    let sb_header_size = 4 + 1 + 1 + os;
596    if sb_offset + sb_header_size > file_data.len() {
597        return Err(FormatError::UnexpectedEof {
598            expected: sb_offset + sb_header_size,
599            available: file_data.len(),
600        });
601    }
602
603    if &file_data[sb_offset..sb_offset + 4] != b"EASB" {
604        return Err(FormatError::ChunkedReadError(
605            "invalid Extensible Array super block signature".into(),
606        ));
607    }
608
609    let mut pos = sb_offset + sb_header_size;
610
611    // Read data block addresses
612    let mut dblk_addrs: Vec<u64> = Vec::with_capacity(ndblks);
613    for _ in 0..ndblks {
614        if pos + os > file_data.len() {
615            return Err(FormatError::UnexpectedEof {
616                expected: pos + os,
617                available: file_data.len(),
618            });
619        }
620        let addr = read_offset(file_data, pos, offset_size)?;
621        dblk_addrs.push(addr);
622        pos += os;
623    }
624
625    let mut chunks = Vec::new();
626    let mut global_idx = start_index;
627
628    for &addr in &dblk_addrs {
629        if is_undefined_addr(addr, offset_size) {
630            global_idx += nelmts_per_dblk;
631            continue;
632        }
633        let block_chunks = read_data_block_elements(
634            file_data,
635            addr as usize,
636            nelmts_per_dblk,
637            header,
638            offset_size,
639            chunk_byte_size,
640            global_idx,
641            num_chunks_per_dim,
642            chunk_dimensions,
643        )?;
644        chunks.extend(block_chunks);
645        global_idx += nelmts_per_dblk;
646    }
647
648    Ok(chunks)
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654    #[test]
655    fn index_to_offsets_1d() {
656        let num_chunks = vec![5u64];
657        let chunk_dims = vec![20u32];
658        assert_eq!(index_to_chunk_offsets(0, &num_chunks, &chunk_dims), vec![0]);
659        assert_eq!(index_to_chunk_offsets(1, &num_chunks, &chunk_dims), vec![20]);
660        assert_eq!(index_to_chunk_offsets(4, &num_chunks, &chunk_dims), vec![80]);
661    }
662
663    #[test]
664    fn index_to_offsets_2d() {
665        let num_chunks = vec![3u64, 2];
666        let chunk_dims = vec![4u32, 3];
667        assert_eq!(index_to_chunk_offsets(0, &num_chunks, &chunk_dims), vec![0, 0]);
668        assert_eq!(index_to_chunk_offsets(1, &num_chunks, &chunk_dims), vec![0, 3]);
669        assert_eq!(index_to_chunk_offsets(2, &num_chunks, &chunk_dims), vec![4, 0]);
670    }
671
672    #[test]
673    fn parse_header_valid() {
674        let os: u8 = 8;
675        let ls: u8 = 8;
676        let mut buf = vec![0u8; 256];
677        buf[0..4].copy_from_slice(b"EAHD");
678        buf[4] = 0; // version
679        buf[5] = 0; // client_id = non-filtered
680        buf[6] = 8; // element_size
681        buf[7] = 10; // max_nelmts_bits
682        buf[8] = 2; // idx_blk_elmts
683        buf[9] = 4; // min_dblk_nelmts
684        buf[10] = 2; // super_blk_min_nelmts
685        buf[11] = 8; // max_dblk_nelmts_bits
686        // 6 stats fields (each 8 bytes)
687        buf[12..20].copy_from_slice(&0u64.to_le_bytes()); // stat[0]
688        buf[20..28].copy_from_slice(&0u64.to_le_bytes()); // stat[1]
689        buf[28..36].copy_from_slice(&0u64.to_le_bytes()); // stat[2]
690        buf[36..44].copy_from_slice(&0u64.to_le_bytes()); // stat[3]
691        buf[44..52].copy_from_slice(&5u64.to_le_bytes()); // stat[4] = num_elements
692        buf[52..60].copy_from_slice(&0u64.to_le_bytes()); // stat[5]
693        buf[60..68].copy_from_slice(&0x1000u64.to_le_bytes()); // index_block_address
694
695        let hdr = ExtensibleArrayHeader::parse(&buf, 0, os, ls).unwrap();
696        assert_eq!(hdr.client_id, 0);
697        assert_eq!(hdr.element_size, 8);
698        assert_eq!(hdr.idx_blk_elmts, 2);
699        assert_eq!(hdr.min_dblk_nelmts, 4);
700        assert_eq!(hdr.num_elements, 5);
701        assert_eq!(hdr.index_block_address, 0x1000);
702    }
703
704    #[test]
705    fn parse_header_invalid_signature() {
706        let mut buf = vec![0u8; 256];
707        buf[0..4].copy_from_slice(b"XXXX");
708        let result = ExtensibleArrayHeader::parse(&buf, 0, 8, 8);
709        assert!(result.is_err());
710    }
711
712    #[test]
713    fn parse_header_invalid_version() {
714        let mut buf = vec![0u8; 256];
715        buf[0..4].copy_from_slice(b"EAHD");
716        buf[4] = 1;
717        let result = ExtensibleArrayHeader::parse(&buf, 0, 8, 8);
718        assert!(result.is_err());
719    }
720
721    /// Build a synthetic Extensible Array with only inline elements (simplest case).
722    /// All chunks fit in the index block.
723    #[test]
724    fn read_inline_only() {
725        let os: u8 = 8;
726        let ls: u8 = 8;
727        let osv = os as usize;
728        let num_chunks = 2usize;
729        let chunk_byte_size = 20u64 * 8; // 20 elements × 8 bytes
730
731        let mut file_data = vec![0u8; 0x3000];
732
733        // AEHD at offset 0x100
734        let aehd_offset = 0x100usize;
735        let aeib_offset = 0x200usize;
736
737        // Build AEHD
738        file_data[aehd_offset..aehd_offset + 4].copy_from_slice(b"EAHD");
739        file_data[aehd_offset + 4] = 0; // version
740        file_data[aehd_offset + 5] = 0; // client_id = non-filtered
741        file_data[aehd_offset + 6] = osv as u8; // element_size
742        file_data[aehd_offset + 7] = 10; // max_nelmts_bits
743        file_data[aehd_offset + 8] = num_chunks as u8; // idx_blk_elmts (all inline)
744        file_data[aehd_offset + 9] = 4; // min_dblk_nelmts
745        file_data[aehd_offset + 10] = 2; // super_blk_min_nelmts
746        file_data[aehd_offset + 11] = 8; // max_dblk_nelmts_bits
747        // 6 stats fields (each 8 bytes), nelmts at stat[4]
748        file_data[aehd_offset + 44..aehd_offset + 52]
749            .copy_from_slice(&(num_chunks as u64).to_le_bytes());
750        file_data[aehd_offset + 60..aehd_offset + 68]
751            .copy_from_slice(&(aeib_offset as u64).to_le_bytes());
752        // checksum (4 bytes at +68) — not validated
753
754        // Build AEIB at aeib_offset
755        file_data[aeib_offset..aeib_offset + 4].copy_from_slice(b"EAIB");
756        file_data[aeib_offset + 4] = 0; // version
757        file_data[aeib_offset + 5] = 0; // client_id
758        file_data[aeib_offset + 6..aeib_offset + 14]
759            .copy_from_slice(&(aehd_offset as u64).to_le_bytes());
760
761        // Inline elements
762        let elem_start = aeib_offset + 6 + osv;
763        let base_addr = 0x1000u64;
764        for i in 0..num_chunks {
765            let addr = base_addr + i as u64 * chunk_byte_size;
766            let p = elem_start + i * osv;
767            file_data[p..p + osv].copy_from_slice(&addr.to_le_bytes());
768        }
769
770        let header = ExtensibleArrayHeader::parse(&file_data, aehd_offset, os, ls).unwrap();
771        let ds_dims = vec![40u64]; // 2 chunks × 20 elements
772        let chunk_dims = vec![20u32];
773        let chunks = read_extensible_array_chunks(
774            &file_data,
775            &header,
776            &ds_dims,
777            &chunk_dims,
778            8,
779            os,
780            ls,
781        )
782        .unwrap();
783
784        assert_eq!(chunks.len(), 2);
785        assert_eq!(chunks[0].address, base_addr);
786        assert_eq!(chunks[0].offsets, vec![0]);
787        assert_eq!(chunks[0].chunk_size, chunk_byte_size as u32);
788        assert_eq!(chunks[1].address, base_addr + chunk_byte_size);
789        assert_eq!(chunks[1].offsets, vec![20]);
790    }
791
792    /// Build a synthetic EA with inline elements + one direct data block.
793    #[test]
794    fn read_inline_plus_data_blocks() {
795        let os: u8 = 8;
796        let ls: u8 = 8;
797        let osv = os as usize;
798        let chunk_byte_size = 10u64 * 8; // 10 elements × 8 bytes
799        let idx_blk_elmts = 2u8;
800        let min_dblk_nelmts = 2u8;
801        let sblk_min = 2u8;
802        let total_chunks = 4usize; // 2 inline + 2 in data block (1 dblk from sb_level 0)
803
804        let mut file_data = vec![0u8; 0x5000];
805        let aehd_offset = 0x100usize;
806        let aeib_offset = 0x200usize;
807        let aedb_offset = 0x300usize;
808
809        // EAHD
810        file_data[aehd_offset..aehd_offset + 4].copy_from_slice(b"EAHD");
811        file_data[aehd_offset + 4] = 0;
812        file_data[aehd_offset + 5] = 0; // client_id
813        file_data[aehd_offset + 6] = osv as u8; // element_size
814        file_data[aehd_offset + 7] = 10;
815        file_data[aehd_offset + 8] = idx_blk_elmts;
816        file_data[aehd_offset + 9] = min_dblk_nelmts;
817        file_data[aehd_offset + 10] = sblk_min;
818        file_data[aehd_offset + 11] = 8;
819        // 6 stats fields (each 8 bytes), nelmts at stat[4] (offset 12 + 4*8 = 44)
820        file_data[aehd_offset + 44..aehd_offset + 52]
821            .copy_from_slice(&(total_chunks as u64).to_le_bytes());
822        // idx_blk_addr at offset 12 + 6*8 = 60
823        file_data[aehd_offset + 60..aehd_offset + 68]
824            .copy_from_slice(&(aeib_offset as u64).to_le_bytes());
825
826        // AEIB
827        file_data[aeib_offset..aeib_offset + 4].copy_from_slice(b"EAIB");
828        file_data[aeib_offset + 4] = 0;
829        file_data[aeib_offset + 5] = 0;
830        file_data[aeib_offset + 6..aeib_offset + 14]
831            .copy_from_slice(&(aehd_offset as u64).to_le_bytes());
832
833        let mut pos = aeib_offset + 6 + osv;
834
835        // Inline elements (2 chunks)
836        let base_addr = 0x1000u64;
837        for i in 0..idx_blk_elmts as usize {
838            let addr = base_addr + i as u64 * chunk_byte_size;
839            file_data[pos..pos + osv].copy_from_slice(&addr.to_le_bytes());
840            pos += osv;
841        }
842
843        // Direct data block addresses: first sb_level=0 has 1 dblk, sb_level=1 has 1 dblk
844        // Total direct dblks for sblk_min=2: 2^0 + 2^1 = 1 + 2 = 3 (oops)
845        // Actually: sblk_min levels. level 0: 2^0=1 dblk, level 1: 2^1=2 dblks => 3 dblks
846        // But we only have 2 remaining elements.
847        // dblk sizes: level 0: 1 dblk of min_dblk=2; level 1: 2 dblks of 2 each (nelmts doubles at level > 0)
848        // Wait, re-reading the code: at level 0, nelmts=min_dblk=2, 1 dblk.
849        // At level 1, 1 dblk, nelmts still 2 (doubles only at level > 0... but the code says
850        // `if sb_level > 0 { nelmts *= 2 }` after pushing). Let me re-check.
851        // After push at level 0: nelmts=2. Then if 0>0 false, no double. Push 1 dblk of 2.
852        // Level 1: ndblks=2. Push 2 dblks of 2. Then 1>0 true, nelmts=4.
853        // Total: 3 dblks with sizes [2, 2, 2]. Total = 6.
854        // We only need 2 more elements. So only the first dblk has data.
855        let n_direct_dblks = 3;
856        file_data[pos..pos + osv].copy_from_slice(&(aedb_offset as u64).to_le_bytes());
857        pos += osv;
858        // 2 more dblk addresses - undefined
859        for _ in 1..n_direct_dblks {
860            file_data[pos..pos + osv].copy_from_slice(&u64::MAX.to_le_bytes());
861            pos += osv;
862        }
863
864        // EADB at aedb_offset (min_dblk_nelmts elements)
865        file_data[aedb_offset..aedb_offset + 4].copy_from_slice(b"EADB");
866        file_data[aedb_offset + 4] = 0;
867        file_data[aedb_offset + 5] = 0;
868        file_data[aedb_offset + 6..aedb_offset + 14]
869            .copy_from_slice(&(aehd_offset as u64).to_le_bytes());
870        // block_offset: ceil(max_nelmts_bits/8) = ceil(10/8) = 2 bytes
871        // block_offset = 0 for first data block
872        let blk_off_size = (10usize).div_ceil(8); // max_nelmts_bits=10
873        let mut dbpos = aedb_offset + 6 + osv + blk_off_size;
874        for i in 0..min_dblk_nelmts as usize {
875            let addr = base_addr + (idx_blk_elmts as u64 + i as u64) * chunk_byte_size;
876            file_data[dbpos..dbpos + osv].copy_from_slice(&addr.to_le_bytes());
877            dbpos += osv;
878        }
879
880        let header = ExtensibleArrayHeader::parse(&file_data, aehd_offset, os, ls).unwrap();
881        let ds_dims = vec![40u64];
882        let chunk_dims = vec![10u32];
883        let chunks = read_extensible_array_chunks(
884            &file_data, &header, &ds_dims, &chunk_dims, 8, os, ls,
885        )
886        .unwrap();
887
888        assert_eq!(chunks.len(), 4);
889        for (i, c) in chunks.iter().enumerate() {
890            assert_eq!(c.address, base_addr + i as u64 * chunk_byte_size);
891            assert_eq!(c.offsets, vec![i as u64 * 10]);
892        }
893    }
894
895    /// Test serialized_size computation.
896    #[test]
897    fn header_serialized_size() {
898        // 12 fixed + 6*8 stats + 8 addr + 4 checksum = 72
899        assert_eq!(ExtensibleArrayHeader::serialized_size(8, 8), 72);
900        // 12 fixed + 6*4 stats + 4 addr + 4 checksum = 44
901        assert_eq!(ExtensibleArrayHeader::serialized_size(4, 4), 44);
902    }
903
904    /// Verify read_element for unallocated slots.
905    #[test]
906    fn read_element_unallocated() {
907        let data = vec![0xFFu8; 16];
908        let num_chunks = vec![5u64];
909        let chunk_dims = vec![10u32];
910        let (info, consumed) = read_element(
911            &data, 0, 0, 8, 8, 80, 0, &num_chunks, &chunk_dims,
912        )
913        .unwrap();
914        assert!(info.is_none());
915        assert_eq!(consumed, 8);
916    }
917
918    /// Verify filtered element reading.
919    #[test]
920    fn read_element_filtered() {
921        let os: u8 = 8;
922        let chunk_size_bytes = 4usize;
923        let elem_size = os as usize + chunk_size_bytes + 4;
924        let mut data = vec![0u8; elem_size + 16];
925        // Address
926        data[0..8].copy_from_slice(&0x2000u64.to_le_bytes());
927        // Compressed size (4 bytes LE)
928        data[8..12].copy_from_slice(&120u32.to_le_bytes());
929        // Filter mask
930        data[12..16].copy_from_slice(&0u32.to_le_bytes());
931
932        let num_chunks = vec![5u64];
933        let chunk_dims = vec![10u32];
934        let (info, consumed) = read_element(
935            &data, 0, 1, elem_size as u8, os, 80, 2, &num_chunks, &chunk_dims,
936        )
937        .unwrap();
938        let ci = info.unwrap();
939        assert_eq!(ci.address, 0x2000);
940        assert_eq!(ci.chunk_size, 120);
941        assert_eq!(ci.filter_mask, 0);
942        assert_eq!(ci.offsets, vec![20]);
943        assert_eq!(consumed, elem_size);
944    }
945}